From: Emilio López emilio.lopez@collabora.co.uk
If a sw_sync_timeline is destroyed the fences associated to it need to be signalled. This test checks that.
Cc: Shuah Khan shuah@kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Emilio López emilio.lopez@collabora.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com --- tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/sync_wait.c | 58 ++++++++++++++++++++++++++++++++ tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 60 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 62fa666e501a..5d93c9dcc290 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -79,6 +79,7 @@ int main(void) err += RUN_TEST(test_fence_one_timeline_merge); err += RUN_TEST(test_fence_merge_same_fence); err += RUN_TEST(test_fence_multi_timeline_wait); + err += RUN_TEST(test_fence_wait_on_destroyed_timeline); err += RUN_TEST(test_stress_two_threads_shared_timeline); err += RUN_TEST(test_consumer_stress_multi_producer_single_consumer); err += RUN_TEST(test_merge_stress_random_merge); diff --git a/tools/testing/selftests/sync/sync_wait.c b/tools/testing/selftests/sync/sync_wait.c index d69b752f6550..82ad9f519959 100644 --- a/tools/testing/selftests/sync/sync_wait.c +++ b/tools/testing/selftests/sync/sync_wait.c @@ -25,6 +25,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */
+#include <pthread.h> #include "sync.h" #include "sw_sync.h" #include "synctest.h" @@ -89,3 +90,60 @@ int test_fence_multi_timeline_wait(void)
return 0; } + +struct fds_test { + int timeline; + int fencesig, fencekill; + int result; +}; + +static int test_fence_wait_on_destroyed_timeline_thread(void *d) +{ + struct fds_test *data = d; + int ret; + + /* in case of errors */ + data->result = 1; + + ret = sw_sync_timeline_inc(data->timeline, 100); + ASSERT(ret == 0, "Failure advancing timeline\n"); + + ret = sync_wait(data->fencekill, -1); + ASSERT(ret == 1, "Failure waiting on fence\n"); + + /* no errors occurred */ + data->result = 0; + return 0; +} + +int test_fence_wait_on_destroyed_timeline(void) +{ + struct fds_test data; + pthread_t thread; + int valid; + + data.timeline = sw_sync_timeline_create(); + valid = sw_sync_timeline_is_valid(data.timeline); + ASSERT(valid, "Failure allocating timeline\n"); + + data.fencesig = sw_sync_fence_create(data.timeline, "allocFence", 100); + data.fencekill = sw_sync_fence_create(data.timeline, "allocFence", 200); + + /* Spawn a thread to wait on a fence when the timeline is killed */ + pthread_create(&thread, NULL, (void *(*)(void *)) + test_fence_wait_on_destroyed_timeline_thread, &data); + + /* Wait for the thread to spool up */ + sync_wait(data.fencesig, -1); + + /* Kill the timeline */ + sw_sync_timeline_destroy(data.timeline); + + /* wait for the thread to clean up */ + pthread_join(thread, NULL); + + sw_sync_fence_destroy(data.fencesig); + sw_sync_fence_destroy(data.fencekill); + + return data.result; +} diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h index e7d1d57dba7a..1cbe1e3658b3 100644 --- a/tools/testing/selftests/sync/synctest.h +++ b/tools/testing/selftests/sync/synctest.h @@ -53,6 +53,7 @@ int test_fence_merge_same_fence(void);
/* Fence wait tests */ int test_fence_multi_timeline_wait(void); +int test_fence_wait_on_destroyed_timeline(void);
/* Stress test - parallelism */ int test_stress_two_threads_shared_timeline(void);
From: Dominik Behr dbehr@chromium.org
To avoid hanging userspace components that might have been waiting on the active fences of the destroyed timeline we need to signal with error all remaining fences on such timeline.
This restore the default behaviour of the Android sw_sync framework, which Android still relies on. It was broken on the dma fence conversion a few years ago and never fixed.
Signed-off-by: Dominik Behr dbehr@chromium.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com --- drivers/dma-buf/sw_sync.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 38cc7389a6c1..a85198c75a1b 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -321,9 +321,21 @@ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data; + struct sync_pt *pt, *next;
smp_wmb();
+ spin_lock_irq(&obj->lock); + + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + dma_fence_set_error(&pt->base, -ENOENT); + dma_fence_signal_locked(&pt->base); + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + } + + spin_unlock_irq(&obj->lock); + sync_timeline_put(obj); return 0; }
Quoting Gustavo Padovan (2017-09-05 00:06:04)
From: Dominik Behr dbehr@chromium.org
To avoid hanging userspace components that might have been waiting on the active fences of the destroyed timeline we need to signal with error all remaining fences on such timeline.
This restore the default behaviour of the Android sw_sync framework, which Android still relies on. It was broken on the dma fence conversion a few years ago and never fixed.
Signed-off-by: Dominik Behr dbehr@chromium.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
drivers/dma-buf/sw_sync.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 38cc7389a6c1..a85198c75a1b 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -321,9 +321,21 @@ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data;
struct sync_pt *pt, *next; smp_wmb();
spin_lock_irq(&obj->lock);
list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
dma_fence_set_error(&pt->base, -ENOENT);
dma_fence_signal_locked(&pt->base);
list_del_init(&pt->link);
rb_erase(&pt->node, &obj->pt_tree);
Since the timeline is dead, we don't need to worry about trimming the completed signals from the timeline. No need for the list_del/rb_erase here, especially considering the trickiness required to avoid the potential deadlock which is missing here. -Chris
dri-devel@lists.freedesktop.org