Peter Zijlstra gave a lot of feedback and thanks to him, I think both the function names and docs are a lot more sane. There is also a good consensus on renaming dma-buf's struct fence to be struct dma_fence, allowing for use of the cleaner name for the core struct.
A quick overview of a fence is that it is a struct completion that waits on multiple events (rather than just one). -Chris
Completions are a simple synchronization mechanism, suitable for 1:M barriers where many waiters maybe waiting for a single event. However, some event driven mechanisms require a graph of events where one event may depend upon several earlier events. The kfence extends the struct completion to be able to asynchronously wait upon several event sources, including completions and other fences forming the basis on which an acyclic dependency graph can be built. Most often this is used to create a set of interdependent tasks that can be run concurrently but yet serialised where necessary. For example, the kernel init sequence has many tasks that could be run in parallel so long as their dependencies on previous tasks have been completed. Similarly we have the problem of assigning interdependent tasks to multiple hardware execution engines, be they used for rendering or for display. kfences provides a building block which can be used for determining an order in which tasks can execute.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- include/linux/kfence.h | 64 ++++ kernel/Makefile | 2 +- kernel/kfence.c | 431 +++++++++++++++++++++++++++ lib/Kconfig.debug | 23 ++ lib/Makefile | 1 + lib/test-kfence.c | 536 ++++++++++++++++++++++++++++++++++ tools/testing/selftests/lib/kfence.sh | 10 + 7 files changed, 1066 insertions(+), 1 deletion(-) create mode 100644 include/linux/kfence.h create mode 100644 kernel/kfence.c create mode 100644 lib/test-kfence.c create mode 100755 tools/testing/selftests/lib/kfence.sh
diff --git a/include/linux/kfence.h b/include/linux/kfence.h new file mode 100644 index 000000000000..6e32385b3b8c --- /dev/null +++ b/include/linux/kfence.h @@ -0,0 +1,64 @@ +/* + * kfence.h - library routines for N:M synchronisation points + * + * Copyright (C) 2016 Intel Corporation + * + * This file is released under the GPLv2. + * + */ + +#ifndef _KFENCE_H_ +#define _KFENCE_H_ + +#include <linux/gfp.h> +#include <linux/kref.h> +#include <linux/notifier.h> /* for NOTIFY_DONE */ +#include <linux/wait.h> + +struct completion; + +struct kfence { + wait_queue_head_t wait; + unsigned long flags; + struct kref kref; + atomic_t pending; +}; + +#define KFENCE_CHECKED_BIT 0 /* used internally for DAG checking */ +#define KFENCE_PRIVATE_BIT 1 /* available for use by owner */ +#define KFENCE_MASK (~3) + +#define __kfence_call __aligned(4) +typedef int (*kfence_notify_t)(struct kfence *); + +void kfence_init(struct kfence *fence, kfence_notify_t fn); + +struct kfence *kfence_get(struct kfence *fence); +void kfence_put(struct kfence *fence); + +void kfence_await(struct kfence *fence); +int kfence_await_kfence(struct kfence *fence, + struct kfence *after, + gfp_t gfp); +int kfence_await_completion(struct kfence *fence, + struct completion *x, + gfp_t gfp); +void kfence_complete(struct kfence *fence); +void kfence_wake_up_all(struct kfence *fence); +void kfence_wait(struct kfence *fence); + +/** + * kfence_done - report when the fence has been passed + * @fence: the kfence to query + * + * kfence_done() reports true when the fence is no longer waiting for any + * events and has completed its fence-complete notification. + * + * Returns true when the fence has been passed, false otherwise. + */ +static inline bool kfence_done(const struct kfence *fence) +{ + return atomic_read(&fence->pending) < 0; +} + +#endif /* _KFENCE_H_ */ diff --git a/kernel/Makefile b/kernel/Makefile index e2ec54e2b952..ff11f31b7ec9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o + async.o kfence.o range.o smpboot.o
obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/kfence.c b/kernel/kfence.c new file mode 100644 index 000000000000..693af9da545a --- /dev/null +++ b/kernel/kfence.c @@ -0,0 +1,431 @@ +/* + * (C) Copyright 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/kfence.h> +#include <linux/slab.h> + +/** + * DOC: kfence overview + * + * kfences provide synchronisation barriers between multiple tasks. They are + * very similar to completions, or the OpenGL fence synchronisation object. + * Where kfences differ from completions is their ability to track multiple + * event sources rather than being a singular "completion event". Similar to + * completions multiple processes can wait upon a kfence. However, unlike + * completions, a kfence can wait upon other kfences allowing for a graph + * of interdependent events. + * + * Each kfence is a one-shot flag, signaling that work has progressed past + * a certain point (as measured by completion of all events the kfence is + * listening for) and the waiters upon that kfence may proceed. + * + * kfences provide both signaling and waiting routines: + * + * - kfence_await(): indicates that the kfence is asynchronously waiting for + * another event. + * + * - kfence_complete(): undoes the earlier await and marks the fence as done + * if all of its pending events have been completed. + * + * - kfence_done(): reports whether or not the kfence has been passed. + * + * - kfence_wait(): allows the caller to sleep (uninterruptibly) until the + * fence is passed. + * + * This interface is very similar to completions, with the exception of + * allowing the fence to await multiple events. kfences can wait upon other + * fences or other hardware events, building an ordered dependency graph: + * + * - kfence_await_kfence(): the kfence asynchronously waits upon completion + * of another kfence + * + * - kfence_await_completion(): the kfence asynchronously waits upon a + * completion + * + * A kfence is initialised using kfence_init(), and starts off awaiting an + * event. Once you have finished setting up the fence, including adding + * all of its asynchronous waits, call kfence_complete(). + * + * Unlike completions, kfences are expected to live inside more complex graphs + * and form the basis for parallel execution of interdependent tasks and so are + * reference counted. Use kfence_get() and kfence_put() to acquire or release + * a reference to the kfence respectively. + * + * The kfence can be embedded inside a larger structure and be used as part + * of its event driven mechanism. As such kfence_init() can be passed a + * callback function that will be called first when the kfence is completed, + * and again when the kfence is to be freed. If no callback is provided, the + * kfence will be freed using kfree() when its reference count hits zero - + * if it is embedded inside another structure and no callback is provided, + * it must be the first member of its parent struct. + * + * The fence-completed notification is called before any listeners upon the + * fence are signaled, or any waiters woken. You can defer their wake up by + * returning NOTIFY_OK from the fence-completed notification and calling + * kfence_wake_up_all() later when ready. + */ + +static DEFINE_SPINLOCK(kfence_lock); + +static int __kfence_notify(struct kfence *fence) +{ + kfence_notify_t fn; + + fn = (kfence_notify_t)(fence->flags & KFENCE_MASK); + return fn(fence); +} + +static void kfence_free(struct kref *kref) +{ + struct kfence *fence = container_of(kref, typeof(*fence), kref); + + WARN_ON(atomic_read(&fence->pending) > 0); + + if (fence->flags & KFENCE_MASK) + WARN_ON(__kfence_notify(fence) != NOTIFY_DONE); + else + kfree(fence); +} + +/** + * kfence_put - release a reference to a kfence + * @fence: the kfence being disposed of + * + * kfence_put() decrements the reference count on the @fence, and when + * it hits zero the fence will be freed. + */ +void kfence_put(struct kfence *fence) +{ + kref_put(&fence->kref, kfence_free); +} +EXPORT_SYMBOL_GPL(kfence_put); + +/** + * kfence_get - acquire a reference to a kfence + * @fence: the kfence being used + * + * Returns the pointer to the kfence, with its reference count incremented. + */ +struct kfence *kfence_get(struct kfence *fence) +{ + kref_get(&fence->kref); + return fence; +} +EXPORT_SYMBOL_GPL(kfence_get); + +static void __kfence_wake_up_all(struct kfence *fence, + struct list_head *continuation) +{ + wait_queue_head_t *x = &fence->wait; + unsigned long flags; + + atomic_dec(&fence->pending); + + /* + * To prevent unbounded recursion as we traverse the graph of kfences, + * we move the task_list from this the next ready fence to the tail of + * the original fence's task_list (and so added to the list to be + * woken). + */ + smp_mb__before_spinlock(); + spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation); + if (continuation) { + list_splice_tail_init(&x->task_list, continuation); + } else { + while (!list_empty(&x->task_list)) + __wake_up_locked_key(x, TASK_NORMAL, &x->task_list); + } + spin_unlock_irqrestore(&x->lock, flags); +} + +/** + * kfence_wake_up_all - wake all waiters upon a fence + * @fence: the kfence to signal + * + * If the fence-complete notification is deferred, when the callback is + * complete it should call kfence_wake_up_all() to wake up all waiters + * upon the fence. + * + * It is invalid to call kfence_wake_up_all() at any time other than from + * inside a deferred fence-complete notification. + */ +void kfence_wake_up_all(struct kfence *fence) +{ + WARN_ON(atomic_read(&fence->pending) != 0); + __kfence_wake_up_all(fence, NULL); +} + +static void __kfence_complete(struct kfence *fence, + struct list_head *continuation) +{ + if (!atomic_dec_and_test(&fence->pending)) + return; + + if (fence->flags & KFENCE_MASK && __kfence_notify(fence) != NOTIFY_DONE) + return; + + __kfence_wake_up_all(fence, continuation); +} + +/** + * kfence_await - increment the count of events being asynchronously waited upon + * @fence: the kfence + * + * kfence_await() indicates that the @fence is waiting upon the completion + * of an event. The @fence may wait upon multiple events concurrently. + * When that event is complete, a corresponding call to kfence_complete() + * should be made. + */ +void kfence_await(struct kfence *fence) +{ + WARN_ON(atomic_inc_return(&fence->pending) <= 1); +} +EXPORT_SYMBOL_GPL(kfence_await); + +/** + * kfence_complete - decrement the count of events waited upon + * @fence: the kfence + * + * When all event sources for the @fence are completed, i.e. the event count + * hits zero, all waiters upon the @fence are woken up. + */ +void kfence_complete(struct kfence *fence) +{ + if (WARN_ON(kfence_done(fence))) + return; + + __kfence_complete(fence, NULL); +} +EXPORT_SYMBOL_GPL(kfence_complete); + +/** + * kfence_wait - wait upon a fence to be completed + * @fence: the kfence to wait upon + * + * Blocks (uninterruptibly waits) until the @fence event counter reaches zero + * and then also waits for the fence-completed notification to finish. + */ +void kfence_wait(struct kfence *fence) +{ + wait_event(fence->wait, kfence_done(fence)); +} +EXPORT_SYMBOL_GPL(kfence_wait); + +/** + * kfence_init - initialize a fence for embedded use within a struct + * @fence: this kfence + * @fn: a callback function for when the fence is complete, and when the + * fence is released + * + * This function initialises the @fence for use embedded within a parent + * structure. The optional @fn hook is first called when the fence is completed + * (when all its pending event count hits 0) and again when the fence is + * to be freed. Note that the @fn will be called from atomic context. The @fn + * is stored inside the fence mixed with some flags, and so the @fn must + * be aligned using the __kfence_call function attribute. + * + * If the @fn is not provided, the kfence must be the first member in its + * parent struct as it will be freed using kfree(). + * + * fence-complete notification: @fn will be called when the pending event + * count hits 0, however the fence is not completed unless the callback + * returns NOTIFY_DONE. During this notification callback fence_done() reports + * false. You can suspend completion of the fence by returning + * NOTIFY_OK instead and then later calling kfence_wake_up_all(). + * + * fence-release notification: @fn will be called when the reference count + * hits 0, fence_done() will report true. + */ +void kfence_init(struct kfence *fence, kfence_notify_t fn) +{ + BUG_ON((unsigned long)fn & ~KFENCE_MASK); + + init_waitqueue_head(&fence->wait); + kref_init(&fence->kref); + atomic_set(&fence->pending, 1); + fence->flags = (unsigned long)fn; +} +EXPORT_SYMBOL_GPL(kfence_init); + +static int kfence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) +{ + list_del(&wq->task_list); + __kfence_complete(wq->private, key); + kfence_put(wq->private); + kfree(wq); + return 0; +} + +static bool __kfence_check_if_after(struct kfence *fence, + const struct kfence * const signaler) +{ + wait_queue_t *wq; + + if (__test_and_set_bit(KFENCE_CHECKED_BIT, &fence->flags)) + return false; + + if (fence == signaler) + return true; + + list_for_each_entry(wq, &fence->wait.task_list, task_list) { + if (wq->func != kfence_wake) + continue; + + if (__kfence_check_if_after(wq->private, signaler)) + return true; + } + + return false; +} + +static void __kfence_clear_checked_bit(struct kfence *fence) +{ + wait_queue_t *wq; + + if (!__test_and_clear_bit(KFENCE_CHECKED_BIT, &fence->flags)) + return; + + list_for_each_entry(wq, &fence->wait.task_list, task_list) { + if (wq->func != kfence_wake) + continue; + + __kfence_clear_checked_bit(wq->private); + } +} + +static bool kfence_check_if_after(struct kfence *fence, + const struct kfence * const signaler) +{ + unsigned long flags; + bool err; + + if (!config_enabled(CONFIG_KFENCE_CHECK_DAG)) + return false; + + spin_lock_irqsave(&kfence_lock, flags); + err = __kfence_check_if_after(fence, signaler); + __kfence_clear_checked_bit(fence); + spin_unlock_irqrestore(&kfence_lock, flags); + + return err; +} + +static wait_queue_t *__kfence_create_wq(struct kfence *fence, gfp_t gfp) +{ + wait_queue_t *wq; + + wq = kmalloc(sizeof(*wq), gfp); + if (unlikely(!wq)) + return NULL; + + INIT_LIST_HEAD(&wq->task_list); + wq->flags = 0; + wq->func = kfence_wake; + wq->private = kfence_get(fence); + + kfence_await(fence); + + return wq; +} + +/** + * kfence_await_kfence - set one fence to wait upon another + * @fence: this kfence + * @signaler: target kfence to wait upon + * @gfp: the allowed allocation mask + * + * kfence_await_kfence() causes the @fence to asynchronously wait upon the + * completion of @signaler. + * + * Returns 1 if the @fence was added to the waitqueue of @signaler, 0 + * if @signaler was already complete, or a negative error code. + */ +int kfence_await_kfence(struct kfence *fence, + struct kfence *signaler, + gfp_t gfp) +{ + wait_queue_t *wq; + unsigned long flags; + int pending; + + if (kfence_done(signaler)) + return 0; + + /* The dependency graph must be acyclic. */ + if (unlikely(kfence_check_if_after(fence, signaler))) + return -EINVAL; + + wq = __kfence_create_wq(fence, gfp); + if (unlikely(!wq)) { + if (!gfpflags_allow_blocking(gfp)) + return -ENOMEM; + + kfence_wait(signaler); + return 0; + } + + spin_lock_irqsave(&signaler->wait.lock, flags); + if (likely(!kfence_done(signaler))) { + __add_wait_queue_tail(&signaler->wait, wq); + pending = 1; + } else { + kfence_wake(wq, 0, 0, NULL); + pending = 0; + } + spin_unlock_irqrestore(&signaler->wait.lock, flags); + + return pending; +} +EXPORT_SYMBOL_GPL(kfence_await_kfence); + +/** + * kfence_await_completion - set the fence to wait upon a completion + * @fence: this kfence + * @x: target completion to wait upon + * @gfp: the allowed allocation mask + * + * kfence_await_completion() causes the @fence to asynchronously wait upon + * the completion. + * + * Returns 1 if the @fence was added to the waitqueue of @x, 0 + * if @x was already complete, or a negative error code. + */ +int kfence_await_completion(struct kfence *fence, + struct completion *x, + gfp_t gfp) +{ + wait_queue_t *wq; + unsigned long flags; + int pending; + + if (completion_done(x)) + return 0; + + wq = __kfence_create_wq(fence, gfp); + if (unlikely(!wq)) { + if (!gfpflags_allow_blocking(gfp)) + return -ENOMEM; + + wait_for_completion(x); + return 0; + } + + spin_lock_irqsave(&x->wait.lock, flags); + if (likely(!READ_ONCE(x->done))) { + __add_wait_queue_tail(&x->wait, wq); + pending = 1; + } else { + kfence_wake(wq, 0, 0, NULL); + pending = 0; + } + spin_unlock_irqrestore(&x->wait.lock, flags); + + return pending; +} +EXPORT_SYMBOL_GPL(kfence_await_completion); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b9cfdbfae9aa..df1182d41f06 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1763,6 +1763,29 @@ config KPROBES_SANITY_TEST
Say N if you are unsure.
+config KFENCE_SELFTEST + tristate "Kfence self tests" + depends on DEBUG_KERNEL + default n + help + This option provides a kernel modules that can be used to test + the kfence handling. This option is not useful for distributions + or general kernels, but only for kernel developers working on the + kfence and async_domain facility. + + Say N if you are unsure. + +config KFENCE_CHECK_DAG + bool "Check that kfence are only used with directed acyclic graphs" + depends on DEBUG_KERNEL + default n + help + This option enforces that kfences are only used with directed acyclic + graphs (DAG), as otherwise the cycles in the graph means that they + will never be signaled (or the corresponding task executed). + + Say N if you are unsure. + config BACKTRACE_SELF_TEST tristate "Self test for the backtrace code" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index ff6a7a6c6395..943781cfe8d1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,6 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o +obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_HAS_DMA) += dma-noop.o diff --git a/lib/test-kfence.c b/lib/test-kfence.c new file mode 100644 index 000000000000..b40719fce967 --- /dev/null +++ b/lib/test-kfence.c @@ -0,0 +1,536 @@ +/* + * Test cases for kfence facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/delay.h> +#include <linux/kfence.h> +#include <linux/module.h> +#include <linux/slab.h> + +static struct kfence *alloc_kfence(void) +{ + struct kfence *fence; + + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + kfence_init(fence, NULL); + return fence; +} + +static int __init __test_self(struct kfence *fence) +{ + if (kfence_done(fence)) + return -EINVAL; + + kfence_complete(fence); + if (!kfence_done(fence)) + return -EINVAL; + + kfence_wait(fence); + if (!kfence_done(fence)) + return -EINVAL; + + return 0; +} + +static int __init test_self(void) +{ + struct kfence *fence; + int ret; + + /* Test kfence signaling and completion testing */ + pr_debug("%s\n", __func__); + + fence = alloc_kfence(); + if (!fence) + return -ENOMEM; + + ret = __test_self(fence); + + kfence_put(fence); + return ret; +} + +struct test_stack { + struct kfence fence; + bool seen; +}; + +static int __init __kfence_call fence_callback(struct kfence *fence) +{ + container_of(fence, typeof(struct test_stack), fence)->seen = true; + return NOTIFY_DONE; +} + +static int __init test_stack(void) +{ + struct test_stack ts; + int ret; + + /* Test kfence signaling and completion testing (on stack) */ + pr_debug("%s\n", __func__); + + ts.seen = false; + kfence_init(&ts.fence, fence_callback); + + ret = __test_self(&ts.fence); + if (ret < 0) + return ret; + + if (!ts.seen) { + pr_err("fence callback not executed\n"); + return -EINVAL; + } + + return 0; +} + +static int __init test_dag(void) +{ + struct kfence *A, *B, *C; + + /* Test detection of cycles within the kfence graphs */ + pr_debug("%s\n", __func__); + + if (!config_enabled(CONFIG_KFENCE_CHECK_DAG)) + return 0; + + A = alloc_kfence(); + if (kfence_await_kfence(A, A, GFP_KERNEL) != -EINVAL) { + pr_err("recursive cycle not detected (AA)\n"); + return -EINVAL; + } + + B = alloc_kfence(); + + kfence_await_kfence(A, B, GFP_KERNEL); + if (kfence_await_kfence(B, A, GFP_KERNEL) != -EINVAL) { + pr_err("single depth cycle not detected (BAB)\n"); + return -EINVAL; + } + + C = alloc_kfence(); + kfence_await_kfence(B, C, GFP_KERNEL); + if (kfence_await_kfence(C, A, GFP_KERNEL) != -EINVAL) { + pr_err("cycle not detected (BA, CB, AC)\n"); + return -EINVAL; + } + + kfence_complete(A); + kfence_put(A); + + kfence_complete(B); + kfence_put(B); + + kfence_complete(C); + kfence_put(C); + + return 0; +} + +static int __init test_AB(void) +{ + struct kfence *A, *B; + int ret; + + /* Test kfence (A) waiting on an event source (B) */ + pr_debug("%s\n", __func__); + + A = alloc_kfence(); + B = alloc_kfence(); + if (!A || !B) + return -ENOMEM; + + ret = kfence_await_kfence(A, B, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(A); + if (kfence_done(A)) + return -EINVAL; + + kfence_complete(B); + if (!kfence_done(B)) + return -EINVAL; + + if (!kfence_done(A)) + return -EINVAL; + + kfence_put(B); + kfence_put(A); + return 0; +} + +static int __init test_ABC(void) +{ + struct kfence *A, *B, *C; + int ret; + + /* Test a chain of fences, A waits on B who waits on C */ + pr_debug("%s\n", __func__); + + A = alloc_kfence(); + B = alloc_kfence(); + C = alloc_kfence(); + if (!A || !B || !C) + return -ENOMEM; + + ret = kfence_await_kfence(A, B, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + ret = kfence_await_kfence(B, C, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(A); + if (kfence_done(A)) + return -EINVAL; + + kfence_complete(B); + if (kfence_done(B)) + return -EINVAL; + + if (kfence_done(A)) + return -EINVAL; + + kfence_complete(C); + if (!kfence_done(C)) + return -EINVAL; + + if (!kfence_done(B)) + return -EINVAL; + + if (!kfence_done(A)) + return -EINVAL; + + kfence_put(C); + kfence_put(B); + kfence_put(A); + return 0; +} + +static int __init test_AB_C(void) +{ + struct kfence *A, *B, *C; + int ret; + + /* Test multiple fences (AB) waiting on a single event (C) */ + pr_debug("%s\n", __func__); + + A = alloc_kfence(); + B = alloc_kfence(); + C = alloc_kfence(); + if (!A || !B || !C) + return -ENOMEM; + + ret = kfence_await_kfence(A, C, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + ret = kfence_await_kfence(B, C, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(A); + kfence_complete(B); + + if (kfence_done(A)) + return -EINVAL; + + if (kfence_done(B)) + return -EINVAL; + + kfence_complete(C); + if (!kfence_done(C)) + return -EINVAL; + + if (!kfence_done(B)) + return -EINVAL; + + if (!kfence_done(A)) + return -EINVAL; + + kfence_put(C); + kfence_put(B); + kfence_put(A); + return 0; +} + +static int __init test_C_AB(void) +{ + struct kfence *A, *B, *C; + int ret; + + /* Test multiple event sources (A,B) for a single fence (C) */ + pr_debug("%s\n", __func__); + + A = alloc_kfence(); + B = alloc_kfence(); + C = alloc_kfence(); + if (!A || !B || !C) + return -ENOMEM; + + ret = kfence_await_kfence(C, A, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + ret = kfence_await_kfence(C, B, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(C); + if (kfence_done(C)) + return -EINVAL; + + kfence_complete(A); + kfence_complete(B); + + if (!kfence_done(A)) + return -EINVAL; + + if (!kfence_done(B)) + return -EINVAL; + + if (!kfence_done(C)) + return -EINVAL; + + kfence_put(C); + kfence_put(B); + kfence_put(A); + return 0; +} + +static int __init test_completion(void) +{ + struct kfence *fence; + struct completion x; + int ret; + + /* Test use of a completion as an event source for kfences */ + pr_debug("%s\n", __func__); + + init_completion(&x); + + fence = alloc_kfence(); + if (!fence) + return -ENOMEM; + + ret = kfence_await_completion(fence, &x, GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(fence); + if (kfence_done(fence)) + return -EINVAL; + + complete_all(&x); + if (!kfence_done(fence)) + return -EINVAL; + + kfence_put(fence); + return 0; +} + +struct task_ipc { + struct work_struct work; + struct completion started; + struct kfence *in, *out; + int value; +}; + +static void __init task_ipc(struct work_struct *work) +{ + struct task_ipc *ipc = container_of(work, typeof(*ipc), work); + + complete(&ipc->started); + + kfence_wait(ipc->in); + smp_store_mb(ipc->value, 1); + kfence_complete(ipc->out); +} + +static int __init test_chain(void) +{ + const int nfences = 4096; + struct kfence **fences; + int ret, i; + + /* Test a long chain of fences */ + pr_debug("%s\n", __func__); + + fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + for (i = 0; i < nfences; i++) { + fences[i] = alloc_kfence(); + if (!fences[i]) + return -ENOMEM; + + if (i > 0) { + ret = kfence_await_kfence(fences[i], + fences[i - 1], + GFP_KERNEL); + if (ret < 0) + return ret; + } + } + + for (i = nfences; --i; ) { + kfence_complete(fences[i]); + if (kfence_done(fences[i])) + return -EINVAL; + } + + kfence_complete(fences[0]); + for (i = 0; i < nfences; i++) { + if (!kfence_done(fences[i])) + return -EINVAL; + + kfence_put(fences[i]); + } + kfree(fences); + return 0; +} + +static int __init test_ipc(void) +{ + struct task_ipc ipc; + int ret = 0; + + /* Test use of kfence as an interprocess signaling mechanism */ + pr_debug("%s\n", __func__); + + ipc.in = alloc_kfence(); + ipc.out = alloc_kfence(); + if (!ipc.in || !ipc.out) + return -ENOMEM; + + /* use a completion to avoid chicken-and-egg testing for kfence */ + init_completion(&ipc.started); + + ipc.value = 0; + INIT_WORK(&ipc.work, task_ipc); + schedule_work(&ipc.work); + + wait_for_completion(&ipc.started); + + usleep_range(1000, 2000); + if (READ_ONCE(ipc.value)) { + pr_err("worker updated value before kfence was signaled\n"); + ret = -EINVAL; + } + + kfence_complete(ipc.in); + kfence_wait(ipc.out); + + if (!READ_ONCE(ipc.value)) { + pr_err("worker signaled kfence before value was posted\n"); + ret = -EINVAL; + } + + flush_work(&ipc.work); + kfence_put(ipc.in); + kfence_put(ipc.out); + return ret; +} + +static int __init test_kfence_init(void) +{ + int ret; + + pr_info("Testing kfences\n"); + + ret = test_self(); + if (ret < 0) { + pr_err("self failed\n"); + return ret; + } + + ret = test_stack(); + if (ret < 0) { + pr_err("stack failed\n"); + return ret; + } + + ret = test_dag(); + if (ret < 0) { + pr_err("DAG checker failed\n"); + return ret; + } + + ret = test_AB(); + if (ret < 0) { + pr_err("AB failed\n"); + return ret; + } + + ret = test_ABC(); + if (ret < 0) { + pr_err("ABC failed\n"); + return ret; + } + + ret = test_AB_C(); + if (ret < 0) { + pr_err("AB_C failed\n"); + return ret; + } + + ret = test_C_AB(); + if (ret < 0) { + pr_err("C_AB failed\n"); + return ret; + } + + ret = test_chain(); + if (ret < 0) { + pr_err("chain failed\n"); + return ret; + } + + ret = test_ipc(); + if (ret < 0) { + pr_err("ipc failed\n"); + return ret; + } + + ret = test_completion(); + if (ret < 0) { + pr_err("completion failed\n"); + return ret; + } + + return 0; +} + +static void __exit test_kfence_cleanup(void) +{ +} + +module_init(test_kfence_init); +module_exit(test_kfence_cleanup); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/lib/kfence.sh b/tools/testing/selftests/lib/kfence.sh new file mode 100755 index 000000000000..487320c70ed1 --- /dev/null +++ b/tools/testing/selftests/lib/kfence.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs infrastructure tests using test-kfence kernel module + +if /sbin/modprobe -q test-kfence; then + /sbin/modprobe -q -r test-kfence + echo "kfence: ok" +else + echo "kfence: [FAIL]" + exit 1 +fi
A common requirement when scheduling a task is that it should be not be begun until a certain point in time is passed (e.g. queue_delayed_work()). kfence_await_hrtimer() causes the kfence to asynchronously wait until after the appropriate time before being woken.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- include/linux/kfence.h | 5 +++++ kernel/kfence.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/test-kfence.c | 44 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+)
diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 6e32385b3b8c..76a2f95dfb70 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -16,6 +16,7 @@ #include <linux/wait.h>
struct completion; +enum hrtimer_mode;
struct kfence { wait_queue_head_t wait; @@ -43,6 +44,10 @@ int kfence_await_kfence(struct kfence *fence, int kfence_await_completion(struct kfence *fence, struct completion *x, gfp_t gfp); +int kfence_await_hrtimer(struct kfence *fence, + clockid_t clock, enum hrtimer_mode mode, + ktime_t delay, u64 slack, + gfp_t gfp); void kfence_complete(struct kfence *fence); void kfence_wake_up_all(struct kfence *fence); void kfence_wait(struct kfence *fence); diff --git a/kernel/kfence.c b/kernel/kfence.c index 693af9da545a..59c27910a749 100644 --- a/kernel/kfence.c +++ b/kernel/kfence.c @@ -48,6 +48,9 @@ * - kfence_await_completion(): the kfence asynchronously waits upon a * completion * + * - kfence_await_hrtimer(): the kfence asynchronously wait for an expiration + * of a timer + * * A kfence is initialised using kfence_init(), and starts off awaiting an * event. Once you have finished setting up the fence, including adding * all of its asynchronous waits, call kfence_complete(). @@ -429,3 +432,58 @@ int kfence_await_completion(struct kfence *fence, return pending; } EXPORT_SYMBOL_GPL(kfence_await_completion); + +struct timer_cb { + struct hrtimer timer; + struct kfence *fence; +}; + +static enum hrtimer_restart +timer_kfence_wake(struct hrtimer *timer) +{ + struct timer_cb *cb = container_of(timer, typeof(*cb), timer); + + kfence_complete(cb->fence); + kfence_put(cb->fence); + kfree(cb); + + return HRTIMER_NORESTART; +} + +/** + * kfence_await_hrtimer - set the fence to wait for a period of time + * @fence: this kfence + * @clock: which clock to program + * @mode: delay given as relative or absolute + * @delay: how long or until what time to wait + * @slack: how much slack that may be applied to the delay + * + * kfence_await_hrtimer() causes the @fence to wait for a a period of time, or + * until a certain point in time. It is a convenience wrapper around + * hrtimer_start_range_ns(). For more details on @clock, @mode, @delay and + * @slack please consult the hrtimer documentation. + * + * Returns 1 if the delay was sucessfuly added to the @fence, or a negative + * error code on failure. + */ +int kfence_await_hrtimer(struct kfence *fence, + clockid_t clock, enum hrtimer_mode mode, + ktime_t delay, u64 slack, + gfp_t gfp) +{ + struct timer_cb *cb; + + cb = kmalloc(sizeof(*cb), gfp); + if (!cb) + return -ENOMEM; + + cb->fence = kfence_get(fence); + kfence_await(fence); + + hrtimer_init(&cb->timer, clock, mode); + cb->timer.function = timer_kfence_wake; + + hrtimer_start_range_ns(&cb->timer, delay, slack, mode); + return 1; +} +EXPORT_SYMBOL_GPL(kfence_await_hrtimer); diff --git a/lib/test-kfence.c b/lib/test-kfence.c index b40719fce967..1b0853fda7c3 100644 --- a/lib/test-kfence.c +++ b/lib/test-kfence.c @@ -352,6 +352,44 @@ static int __init test_completion(void) return 0; }
+static int __init test_delay(void) +{ + struct kfence *fence; + ktime_t delay; + int ret; + + /* Test use of a hrtimer as an event source for kfences */ + pr_debug("%s\n", __func__); + + fence = alloc_kfence(); + if (!fence) + return -ENOMEM; + + delay = ktime_get(); + + ret = kfence_await_hrtimer(fence, CLOCK_MONOTONIC, HRTIMER_MODE_REL, + ms_to_ktime(1), 1 << 10, + GFP_KERNEL); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + kfence_complete(fence); + kfence_wait(fence); + + delay = ktime_sub(ktime_get(), delay); + kfence_put(fence); + + if (!ktime_to_ms(delay)) { + pr_err("kfence woke too early, delay was only %lldns\n", + (long long)ktime_to_ns(delay)); + return -EINVAL; + } + + return 0; +} + struct task_ipc { struct work_struct work; struct completion started; @@ -522,6 +560,12 @@ static int __init test_kfence_init(void) return ret; }
+ ret = test_delay(); + if (ret < 0) { + pr_err("delay failed\n"); + return ret; + } + return 0; }
dma-buf provides an interfaces for receiving notifications from DMA hardware, and for implicitly tracking fences used for rendering into dma-buf. We want to be able to use these event sources along with kfence for easy collection and combining with other events.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- drivers/dma-buf/fence.c | 58 +++++++++++++++++++++++++++++++++++++++++++ drivers/dma-buf/reservation.c | 48 +++++++++++++++++++++++++++++++++++ include/linux/fence.h | 6 +++++ include/linux/kfence.h | 2 ++ include/linux/reservation.h | 7 ++++++ kernel/kfence.c | 8 ++++++ 6 files changed, 129 insertions(+)
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 7b05dbe9b296..3f06b3b1b4cc 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -22,6 +22,7 @@ #include <linux/export.h> #include <linux/atomic.h> #include <linux/fence.h> +#include <linux/kfence.h>
#define CREATE_TRACE_POINTS #include <trace/events/fence.h> @@ -530,3 +531,60 @@ fence_init(struct fence *fence, const struct fence_ops *ops, trace_fence_init(fence); } EXPORT_SYMBOL(fence_init); + +struct dma_fence_cb { + struct fence_cb base; + struct kfence *fence; +}; + +static void dma_kfence_wake(struct fence *dma, struct fence_cb *data) +{ + struct dma_fence_cb *cb = container_of(data, typeof(*cb), base); + + kfence_complete(cb->fence); + kfence_put(cb->fence); + kfree(cb); +} + +/** + * kfence_await_dma_fence - set the fence to wait upon a DMA fence + * @fence: this kfence + * @dma: target DMA fence to wait upon + * @gfp: the allowed allocation type + * + * kfence_add_dma() causes the @fence to wait upon completion of a DMA fence. + * + * Returns 1 if the @fence was successfully to the waitqueue of @dma, 0 + * if @dma was already signaled (and so not added), or a negative error code. + */ +int kfence_await_dma_fence(struct kfence *fence, struct fence *dma, gfp_t gfp) +{ + struct dma_fence_cb *cb; + int ret; + + if (fence_is_signaled(dma)) + return 0; + + cb = kmalloc(sizeof(*cb), gfp); + if (!cb) { + if (!gfpflags_allow_blocking(gfp)) + return -ENOMEM; + + return fence_wait(dma, false); + } + + cb->fence = kfence_get(fence); + kfence_await(fence); + + ret = fence_add_callback(dma, &cb->base, dma_kfence_wake); + if (ret == 0) { + ret = 1; + } else { + dma_kfence_wake(dma, &cb->base); + if (ret == -ENOENT) /* fence already signaled */ + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL_GPL(kfence_await_dma_fence); diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 9566a62ad8e3..138b792af0c3 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -543,3 +543,51 @@ unlock_retry: goto retry; } EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu); + +/** + * kfence_add_reservation - set the fence to wait upon a reservation_object + * @fence: this kfence + * @resv: target reservation_object (collection of DMA fences) to wait upon + * @write: Wait for read or read/write access + * @gfp: the allowed allocation type + * + * kfence_add_reservation() causes the @fence to wait upon completion of the + * reservation object (a collection of DMA fences), either for read access + * or for read/write access. + * + * Returns 1 if the @fence was successfully to the waitqueues of @resv, 0 + * if @resev was already signaled (and so not added), or a negative error code. + */ +int kfence_await_reservation(struct kfence *fence, + struct reservation_object *resv, + bool write, + gfp_t gfp) +{ + struct fence *excl, **shared; + unsigned int count, i; + int ret; + + ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared); + if (ret) + return ret; + + if (write) { + for (i = 0; i < count; i++) { + ret |= kfence_await_dma_fence(fence, shared[i], gfp); + if (ret < 0) + goto out; + } + } + + if (excl) + ret |= kfence_await_dma_fence(fence, excl, gfp); + +out: + fence_put(excl); + for (i = 0; i < count; i++) + fence_put(shared[i]); + kfree(shared); + + return ret; +} +EXPORT_SYMBOL_GPL(kfence_await_reservation); diff --git a/include/linux/fence.h b/include/linux/fence.h index 2056e9fd0138..3c3bc318e826 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -34,6 +34,8 @@ struct fence; struct fence_ops; struct fence_cb;
+struct kfence; + /** * struct fence - software synchronization primitive * @refcount: refcount for this fence @@ -378,4 +380,8 @@ unsigned fence_context_alloc(unsigned num); ##args); \ } while (0)
+int kfence_await_dma_fence(struct kfence *fence, + struct fence *dma, + gfp_t gfp); + #endif /* __LINUX_FENCE_H */ diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 76a2f95dfb70..acbfc2ea7c49 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -16,6 +16,8 @@ #include <linux/wait.h>
struct completion; +struct fence; +struct reservation_object; enum hrtimer_mode;
struct kfence { diff --git a/include/linux/reservation.h b/include/linux/reservation.h index b0f305e77b7f..1954bab95db9 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -49,6 +49,8 @@ extern struct ww_class reservation_ww_class; extern struct lock_class_key reservation_seqcount_class; extern const char reservation_seqcount_string[];
+struct kfence; + /** * struct reservation_object_list - a list of shared fences * @rcu: for internal use @@ -210,4 +212,9 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool reservation_object_test_signaled_rcu(struct reservation_object *obj, bool test_all);
+int kfence_await_reservation(struct kfence *fence, + struct reservation_object *resv, + bool write, + gfp_t gfp); + #endif /* _LINUX_RESERVATION_H */ diff --git a/kernel/kfence.c b/kernel/kfence.c index 59c27910a749..4605eabc2c1b 100644 --- a/kernel/kfence.c +++ b/kernel/kfence.c @@ -7,7 +7,9 @@ * of the License. */
+#include <linux/fence.h> #include <linux/kfence.h> +#include <linux/reservation.h> #include <linux/slab.h>
/** @@ -51,6 +53,12 @@ * - kfence_await_hrtimer(): the kfence asynchronously wait for an expiration * of a timer * + * - kfence_await_dma_fence(): the kfence asynchronously waits for a DMA + * (hardware signaled) fence + * + * - kfence_await_reservation(): the kfence asynchronously waits for a DMA + * reservation object + * * A kfence is initialised using kfence_init(), and starts off awaiting an * event. Once you have finished setting up the fence, including adding * all of its asynchronous waits, call kfence_complete().
A preparatory patch for adding new features (and their tests). First we want to add coverage of existing features to kselftest.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- lib/Kconfig.debug | 9 ++ lib/Makefile | 1 + lib/test-async-domain.c | 131 ++++++++++++++++++++++++++++ tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/async-domain.sh | 10 +++ 5 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 lib/test-async-domain.c create mode 100755 tools/testing/selftests/lib/async-domain.sh
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index df1182d41f06..4b180aed88b6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1784,6 +1784,15 @@ config KFENCE_CHECK_DAG graphs (DAG), as otherwise the cycles in the graph means that they will never be signaled (or the corresponding task executed).
+config ASYNC_DOMAIN_SELFTEST + tristate "Asynchronous domain self tests" + depends on DEBUG_KERNEL + default n + help + the asynchronous task execution. This option is not useful for + distributions or general kernels, but only for kernel developers + working on the async_domain facility. + Say N if you are unsure.
config BACKTRACE_SELF_TEST diff --git a/lib/Makefile b/lib/Makefile index 943781cfe8d1..5864053cf63e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,6 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o +obj-$(CONFIG_ASYNC_DOMAIN_SELFTEST) += test-async-domain.o obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/test-async-domain.c b/lib/test-async-domain.c new file mode 100644 index 000000000000..558a71414fb6 --- /dev/null +++ b/lib/test-async-domain.c @@ -0,0 +1,131 @@ +/* + * Test cases for async-domain facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/async.h> +#include <linux/module.h> +#include <linux/delay.h> + +static void task_A(void *data, async_cookie_t cookie) +{ + long *result = data; + smp_store_mb(*result, 'A'); +} + +static void task_B(void *data, async_cookie_t cookie) +{ + long *result = data; + usleep_range(100, 200); + smp_store_mb(*result, 'B'); +} + +static int __init test_implicit(struct async_domain *domain) +{ + const long expected = 'B'; + long result = 0; + + if (!async_schedule_domain(task_B, &result, domain)) + return -ENOMEM; + + async_synchronize_full_domain(domain); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + __func__, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + +static int __init test_registered(struct async_domain *domain) +{ + const long expected = 'B'; + long result = 0; + + if (!async_schedule_domain(task_B, &result, domain)) + return -ENOMEM; + + async_synchronize_full(); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + __func__, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + +static void task_nop(void *data, async_cookie_t cookie) +{ + async_cookie_t *result = data; + smp_store_mb(*result, cookie); +} + +static int __init perf_nop(int batch, long timeout_us) +{ + ktime_t start; + async_cookie_t nop, last; + long count, delay; + + count = 0; + nop = last = 0; + start = ktime_get(); + do { + ktime_t delta; + int n; + + for (n = 0; n < batch; n++) + last = async_schedule(task_nop, &nop); + async_synchronize_full(); + delta = ktime_sub(ktime_get(), start); + delay = ktime_to_ns(delta) >> 10; + count += batch; + } while (delay < timeout_us); + + pr_info("%ld nop tasks (batches of %d) completed in %ldus; last queued %lld, saw %lld last\n", + count, batch, delay, + (long long)last, (long long)READ_ONCE(nop)); + return 0; +} + +static int __init test_async_domain_init(void) +{ + ASYNC_DOMAIN(domain); + int ret; + + pr_info("Testing async-domains\n"); + + ret = test_implicit(&domain); + if (ret) + return ret; + + ret = test_registered(&domain); + if (ret) + return ret; + + ret = perf_nop(1, 100); + if (ret) + return ret; + + ret = perf_nop(128, 1000); + if (ret) + return ret; + + async_unregister_domain(&domain); + return 0; +} + +static void __exit test_async_domain_cleanup(void) +{ + async_synchronize_full(); +} + +module_init(test_async_domain_init); +module_exit(test_async_domain_cleanup); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 08360060ab14..46a77ac5b4c6 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all:
-TEST_PROGS := printf.sh bitmap.sh +TEST_PROGS := printf.sh bitmap.sh async-domain.sh
include ../lib.mk diff --git a/tools/testing/selftests/lib/async-domain.sh b/tools/testing/selftests/lib/async-domain.sh new file mode 100755 index 000000000000..22c270051de7 --- /dev/null +++ b/tools/testing/selftests/lib/async-domain.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs infrastructure tests using test-async-domain kernel module + +if /sbin/modprobe -q test-async-domain; then + /sbin/modprobe -q -r test-async-domain + echo "async-domain: ok" +else + echo "async-domain: [FAIL]" + exit 1 +fi
The current async-domain model supports running a multitude of independent tasks with a coarse synchronisation point. This is sufficient for its original purpose of allowing independent drivers to run concurrently during various phases (booting, early resume, late resume etc), and keep the asynchronous domain out of the synchronous kernel domains. However, for greater exploitation, drivers themselves want to schedule multiple tasks within a phase (or between phases) and control the order of execution within those tasks relative to each other. To enable this, we extend the synchronisation scheme based upon kfences and back every task with one. Any task may now wait upon the kfence before being scheduled, and equally the kfence may be used to wait on the task itself (rather than waiting on the cookie for all previous tasks to be completed).
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- include/linux/async.h | 60 ++++++++- kernel/async.c | 234 ++++++++++++++++++++-------------- lib/test-async-domain.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 515 insertions(+), 103 deletions(-)
diff --git a/include/linux/async.h b/include/linux/async.h index 6b0226bdaadc..e7d7289a9889 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -13,38 +13,88 @@ #define __ASYNC_H__
#include <linux/types.h> +#include <linux/kfence.h> #include <linux/list.h>
typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); + +struct async_work { + struct kfence fence; + /* private */ +}; + struct async_domain { struct list_head pending; unsigned registered:1; };
+#define ASYNC_DOMAIN_INIT(_name, _r) { \ + .pending = LIST_HEAD_INIT(_name.pending), \ + .registered = _r \ +} + /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ - struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ - .registered = 1 } + struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 1)
/* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ - struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ - .registered = 0 } + struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 0) + +extern void init_async_domain(struct async_domain *domain, bool registered);
extern async_cookie_t async_schedule(async_func_t func, void *data); extern async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain); -void async_unregister_domain(struct async_domain *domain); +extern void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); + extern bool current_is_async(void); + +extern struct async_work * +async_work_create(async_func_t func, void *data, gfp_t gfp); + +static inline struct async_work *async_work_get(struct async_work *work) +{ + kfence_get(&work->fence); + return work; +} + +static inline int +async_work_after(struct async_work *work, struct kfence *fence) +{ + return kfence_await_kfence(&work->fence, fence, GFP_KERNEL); +} + +static inline int +async_work_before(struct async_work *work, struct kfence *fence) +{ + return kfence_await_kfence(fence, &work->fence, GFP_KERNEL); +} + +static inline void async_work_wait(struct async_work *work) +{ + kfence_wait(&work->fence); +} + +static inline void async_work_put(struct async_work *work) +{ + kfence_put(&work->fence); +} + +extern async_cookie_t queue_async_work(struct async_domain *domain, + struct async_work *work, + gfp_t gfp); +extern async_cookie_t schedule_async_work(struct async_work *work); + #endif diff --git a/kernel/async.c b/kernel/async.c index d2edd6efec56..0d695919a60d 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -2,6 +2,7 @@ * async.c: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation + * (C) Copyright 2016 Intel Corporation * Author: Arjan van de Ven arjan@linux.intel.com * * This program is free software; you can redistribute it and/or @@ -59,59 +60,39 @@ asynchronous and synchronous parts of the kernel.
#include "workqueue_internal.h"
-static async_cookie_t next_cookie = 1; - +#define ASYNC_QUEUED_BIT KFENCE_PRIVATE_BIT #define MAX_WORK 32768 -#define ASYNC_COOKIE_MAX ULLONG_MAX /* infinity cookie */ - -static LIST_HEAD(async_global_pending); /* pending from all registered doms */ -static ASYNC_DOMAIN(async_dfl_domain); -static DEFINE_SPINLOCK(async_lock);
struct async_entry { - struct list_head domain_list; - struct list_head global_list; - struct work_struct work; - async_cookie_t cookie; - async_func_t func; - void *data; - struct async_domain *domain; -}; - -static DECLARE_WAIT_QUEUE_HEAD(async_done); + struct async_work base; + struct work_struct work;
-static atomic_t entry_count; + struct list_head pending_link[2];
-static async_cookie_t lowest_in_progress(struct async_domain *domain) -{ - struct list_head *pending; - async_cookie_t ret = ASYNC_COOKIE_MAX; - unsigned long flags; + async_cookie_t cookie; + async_func_t func; + void *data; +};
- spin_lock_irqsave(&async_lock, flags); +static LIST_HEAD(async_global_pending); /* pending from all registered doms */ +static ASYNC_DOMAIN(async_dfl_domain); +static DEFINE_SPINLOCK(async_lock); +static unsigned int async_pending_count;
- if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; +static async_cookie_t assign_cookie(void) +{ + static async_cookie_t next_cookie;
- if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (++next_cookie == 0) + next_cookie = 1;
- spin_unlock_irqrestore(&async_lock, flags); - return ret; + return next_cookie; }
-/* - * pick the first pending entry and run it - */ static void async_run_entry_fn(struct work_struct *work) { - struct async_entry *entry = - container_of(work, struct async_entry, work); - unsigned long flags; - ktime_t uninitialized_var(calltime), delta, rettime; + struct async_entry *entry = container_of(work, typeof(*entry), work); + ktime_t uninitialized_var(calltime);
/* 1) run (and print duration) */ if (initcall_debug && system_state == SYSTEM_BOOTING) { @@ -122,8 +103,7 @@ static void async_run_entry_fn(struct work_struct *work) } entry->func(entry->data, entry->cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); + ktime_t delta = ktime_sub(ktime_get(), calltime); pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n", (long long)entry->cookie, entry->func, @@ -131,69 +111,81 @@ static void async_run_entry_fn(struct work_struct *work) }
/* 2) remove self from the pending queues */ - spin_lock_irqsave(&async_lock, flags); - list_del_init(&entry->domain_list); - list_del_init(&entry->global_list); + spin_lock_irq(&async_lock); + list_del(&entry->pending_link[0]); + list_del(&entry->pending_link[1]); + async_pending_count--; + spin_unlock_irq(&async_lock);
- /* 3) free the entry */ - kfree(entry); - atomic_dec(&entry_count); + /* 3) wake up any waiters */ + kfence_wake_up_all(&entry->base.fence); + kfence_put(&entry->base.fence); +}
- spin_unlock_irqrestore(&async_lock, flags); +__kfence_call static int async_work_notify(struct kfence *fence) +{ + struct async_entry *entry = + container_of(fence, typeof(*entry), base.fence); + + if (kfence_done(fence)) { + kfree(entry); + return NOTIFY_DONE; + }
- /* 4) wake up any waiters */ - wake_up(&async_done); + queue_work(system_unbound_wq, &entry->work); + return NOTIFY_OK; }
-static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain) +struct async_work *async_work_create(async_func_t func, void *data, gfp_t gfp) { struct async_entry *entry; - unsigned long flags; - async_cookie_t newcookie;
- /* allow irq-off callers */ - entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), gfp); + if (!entry) + return NULL;
- /* - * If we're out of memory or if there's too much work - * pending already, we execute synchronously. - */ - if (!entry || atomic_read(&entry_count) > MAX_WORK) { - kfree(entry); - spin_lock_irqsave(&async_lock, flags); - newcookie = next_cookie++; - spin_unlock_irqrestore(&async_lock, flags); + kfence_init(&entry->base.fence, async_work_notify);
- /* low on memory.. run synchronously */ - func(data, newcookie); - return newcookie; - } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); INIT_WORK(&entry->work, async_run_entry_fn); entry->func = func; entry->data = data; - entry->domain = domain;
- spin_lock_irqsave(&async_lock, flags); + return &entry->base; +} +EXPORT_SYMBOL_GPL(async_work_create);
- /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; +async_cookie_t queue_async_work(struct async_domain *domain, + struct async_work *work, + gfp_t gfp) +{ + struct async_entry *entry = container_of(work, typeof(*entry), base); + unsigned long flags;
- list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); + if (WARN_ON(test_and_set_bit(ASYNC_QUEUED_BIT, + &entry->base.fence.flags))) + return 0;
- atomic_inc(&entry_count); + spin_lock_irqsave(&async_lock, flags); + entry->cookie = assign_cookie(); + list_add_tail(&entry->pending_link[0], &domain->pending); + INIT_LIST_HEAD(&entry->pending_link[1]); + if (domain->registered) + list_add_tail(&entry->pending_link[1], &async_global_pending); + async_pending_count++; spin_unlock_irqrestore(&async_lock, flags);
/* mark that this task has queued an async job, used by module init */ current->flags |= PF_USED_ASYNC;
- /* schedule for execution */ - queue_work(system_unbound_wq, &entry->work); + kfence_complete(kfence_get(&entry->base.fence)); + + return entry->cookie; +} +EXPORT_SYMBOL_GPL(queue_async_work);
- return newcookie; +async_cookie_t schedule_async_work(struct async_work *work) +{ + return queue_async_work(&async_dfl_domain, work, GFP_KERNEL); }
/** @@ -206,7 +198,7 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy */ async_cookie_t async_schedule(async_func_t func, void *data) { - return __async_schedule(func, data, &async_dfl_domain); + return async_schedule_domain(func, data, &async_dfl_domain); } EXPORT_SYMBOL_GPL(async_schedule);
@@ -225,7 +217,27 @@ EXPORT_SYMBOL_GPL(async_schedule); async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { - return __async_schedule(func, data, domain); + struct async_work *work; + async_cookie_t cookie = 0; + + work = NULL; + if (READ_ONCE(async_pending_count) < MAX_WORK) + work = async_work_create(func, data, GFP_ATOMIC); + if (work) { + cookie = queue_async_work(domain, work, GFP_ATOMIC); + async_work_put(work); + } + if (!cookie) { + unsigned long flags; + + spin_lock_irqsave(&async_lock, flags); + cookie = assign_cookie(); + spin_unlock_irqrestore(&async_lock, flags); + + func(data, cookie); + } + + return cookie; } EXPORT_SYMBOL_GPL(async_schedule_domain);
@@ -251,10 +263,8 @@ EXPORT_SYMBOL_GPL(async_synchronize_full); */ void async_unregister_domain(struct async_domain *domain) { - spin_lock_irq(&async_lock); - WARN_ON(!domain->registered || !list_empty(&domain->pending)); + WARN_ON(!list_empty(&domain->pending)); domain->registered = 0; - spin_unlock_irq(&async_lock); } EXPORT_SYMBOL_GPL(async_unregister_domain);
@@ -267,7 +277,7 @@ EXPORT_SYMBOL_GPL(async_unregister_domain); */ void async_synchronize_full_domain(struct async_domain *domain) { - async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain); + async_synchronize_cookie_domain(0, domain); } EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
@@ -282,19 +292,49 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); */ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain) { - ktime_t uninitialized_var(starttime), delta, endtime; + ktime_t uninitialized_var(starttime); + struct list_head *pending; + + pending = domain ? &domain->pending : &async_global_pending;
if (initcall_debug && system_state == SYSTEM_BOOTING) { pr_debug("async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); }
- wait_event(async_done, lowest_in_progress(domain) >= cookie); + do { + struct kfence *fence = NULL; + unsigned long flags;
- if (initcall_debug && system_state == SYSTEM_BOOTING) { - endtime = ktime_get(); - delta = ktime_sub(endtime, starttime); + spin_lock_irqsave(&async_lock, flags); + if (!list_empty(pending)) { + struct async_entry *entry; + + if (cookie) { + entry = list_first_entry(pending, + struct async_entry, + pending_link[!domain]); + if ((s64)(cookie - entry->cookie) > 0) + fence = kfence_get(&entry->base.fence); + } else { + entry = list_last_entry(pending, + struct async_entry, + pending_link[!domain]); + cookie = entry->cookie; + fence = kfence_get(&entry->base.fence); + } + } + spin_unlock_irqrestore(&async_lock, flags); + + if (!fence) + break; + + kfence_wait(fence); + kfence_put(fence); + } while (1);
+ if (initcall_debug && system_state == SYSTEM_BOOTING) { + ktime_t delta = ktime_sub(ktime_get(), starttime); pr_debug("async_continuing @ %i after %lli usec\n", task_pid_nr(current), (long long)ktime_to_ns(delta) >> 10); @@ -327,3 +367,11 @@ bool current_is_async(void) return worker && worker->current_func == async_run_entry_fn; } EXPORT_SYMBOL_GPL(current_is_async); + +void init_async_domain(struct async_domain *domain, bool registered) +{ + memset(domain, 0, sizeof(*domain)); + INIT_LIST_HEAD(&domain->pending); + domain->registered = registered; +} +EXPORT_SYMBOL_GPL(init_async_domain); diff --git a/lib/test-async-domain.c b/lib/test-async-domain.c index 558a71414fb6..ecbeba9cd65b 100644 --- a/lib/test-async-domain.c +++ b/lib/test-async-domain.c @@ -7,6 +7,19 @@ #include <linux/async.h> #include <linux/module.h> #include <linux/delay.h> +#include <linux/slab.h> + +static struct kfence *alloc_kfence(void) +{ + struct kfence *fence; + + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + kfence_init(fence, NULL); + return fence; +}
static void task_A(void *data, async_cookie_t cookie) { @@ -21,6 +34,269 @@ static void task_B(void *data, async_cookie_t cookie) smp_store_mb(*result, 'B'); }
+static int __init test_x(const char *name, + struct async_domain *domain, + async_func_t func, + const long expected) +{ + struct async_work *A; + long result = 0; + + A = async_work_create(func, &result, GFP_KERNEL); + if (!A) + return -ENOMEM; + + queue_async_work(domain, A, GFP_KERNEL); + async_work_wait(A); + async_work_put(A); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + name, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + +static int __init test_A(struct async_domain *domain) +{ + return test_x(__func__, domain, task_A, 'A'); +} + +static int __init test_B(struct async_domain *domain) +{ + return test_x(__func__, domain, task_B, 'B'); +} + +static int __init test_x_fence(const char *name, + struct async_domain *domain, + async_func_t func, + const long expected) +{ + struct async_work *A; + struct kfence *fence; + long result = 0; + + A = async_work_create(func, &result, GFP_KERNEL); + if (!A) + return -ENOMEM; + + fence = alloc_kfence(); + if (!fence) + return -ENOMEM; + + queue_async_work(domain, A, GFP_KERNEL); + + kfence_await_kfence(fence, &A->fence, GFP_KERNEL); + kfence_complete(fence); + + kfence_wait(fence); + + async_work_put(A); + kfence_put(fence); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + name, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + +static int __init test_A_fence(struct async_domain *domain) +{ + return test_x_fence(__func__, domain, task_A, 'A'); +} + +static int __init test_B_fence(struct async_domain *domain) +{ + return test_x_fence(__func__, domain, task_B, 'B'); +} + +static int __init test_x_fence_y(const char *name, + struct async_domain *domain, + async_func_t x, + async_func_t y, + const long expected) +{ + struct async_work *A, *B; + struct kfence *fence; + long result = 0; + + A = async_work_create(x, &result, GFP_KERNEL); + if (!A) + return -ENOMEM; + + B = async_work_create(y, &result, GFP_KERNEL); + if (!B) + return -ENOMEM; + + fence = alloc_kfence(); + if (!fence) + return -ENOMEM; + + kfence_await_kfence(fence, &A->fence, GFP_KERNEL); + kfence_complete(fence); + + queue_async_work(domain, A, GFP_KERNEL); + async_work_put(A); + + async_work_after(B, fence); + queue_async_work(domain, B, GFP_KERNEL); + kfence_put(fence); + + async_work_wait(B); + async_work_put(B); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + name, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + +static int __init test_A_fence_B(struct async_domain *domain) +{ + return test_x_fence_y(__func__, domain, task_A, task_B, 'B'); +} + +static int __init test_B_fence_A(struct async_domain *domain) +{ + return test_x_fence_y(__func__, domain, task_B, task_A, 'A'); +} + +struct long_context { + struct kfence *barrier; + long *src; + long result; +}; + +static void task_wait(void *data, async_cookie_t cookie) +{ + struct long_context *ctx = data; + + kfence_wait(ctx->barrier); + smp_store_mb(ctx->result, READ_ONCE(*ctx->src)); +} + +static int __init test_pause(struct async_domain *domain) +{ + struct long_context ctx; + struct async_work *A, *B; + const long expected = 'B'; + long out_B = 'A'; + + ctx.result = 0; + ctx.src = &out_B; + + A = async_work_create(task_wait, &ctx, GFP_KERNEL); + if (!A) + return -ENOMEM; + + B = async_work_create(task_B, &out_B, GFP_KERNEL); + if (!B) + return -ENOMEM; + + ctx.barrier = kfence_get(&B->fence); + + queue_async_work(domain, A, GFP_KERNEL); + queue_async_work(domain, B, GFP_KERNEL); + async_work_put(B); + + async_work_wait(A); + async_work_put(A); + + if (READ_ONCE(ctx.result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + __func__, (char)expected, expected, ctx.result); + return -EINVAL; + } + + kfence_put(ctx.barrier); + + return 0; +} + +static void task_signal(void *data, async_cookie_t cookie) +{ + struct long_context *ctx = data; + + kfence_complete(ctx->barrier); +} + +static int __init test_manual(struct async_domain *domain) +{ + struct long_context ctx; + struct async_work *A, *B, *C; + const long expected = 'B'; + long out_B = 'A'; + + ctx.result = 0; + ctx.src = &out_B; + ctx.barrier = alloc_kfence(); + + A = async_work_create(task_wait, &ctx, GFP_KERNEL); + if (!A) + return -ENOMEM; + + B = async_work_create(task_B, &out_B, GFP_KERNEL); + if (!B) + return -ENOMEM; + + C = async_work_create(task_signal, &ctx, GFP_KERNEL); + if (!B) + return -ENOMEM; + + async_work_after(C, &B->fence); + + queue_async_work(domain, A, GFP_KERNEL); + queue_async_work(domain, B, GFP_KERNEL); + queue_async_work(domain, C, GFP_KERNEL); + + async_work_wait(A); + + async_work_put(C); + async_work_put(B); + async_work_put(A); + kfence_put(ctx.barrier); + + if (READ_ONCE(ctx.result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + __func__, (char)expected, expected, ctx.result); + return -EINVAL; + } + + return 0; +} + +static int __init test_sync(struct async_domain *domain) +{ + struct async_work *B; + const long expected = 'B'; + long result = 0; + + B = async_work_create(task_B, &result, GFP_KERNEL); + if (!B) + return -ENOMEM; + + queue_async_work(domain, B, GFP_KERNEL); + async_work_put(B); + + async_synchronize_full_domain(domain); + + if (READ_ONCE(result) != expected) { + pr_warn("%s expected %c [%ld], got %ld\n", + __func__, (char)expected, expected, result); + return -EINVAL; + } + + return 0; +} + static int __init test_implicit(struct async_domain *domain) { const long expected = 'B'; @@ -99,24 +375,62 @@ static int __init test_async_domain_init(void)
pr_info("Testing async-domains\n");
- ret = test_implicit(&domain); + ret = test_A(&domain); if (ret) return ret;
+ ret = test_A_fence(&domain); + if (ret) + goto err; + + ret = test_A_fence_B(&domain); + if (ret) + goto err; + + ret = test_B(&domain); + if (ret) + goto err; + + ret = test_B_fence(&domain); + if (ret) + goto err; + + ret = test_B_fence_A(&domain); + if (ret) + goto err; + + ret = test_pause(&domain); + if (ret) + goto err; + + ret = test_manual(&domain); + if (ret) + goto err; + + ret = test_sync(&domain); + if (ret) + goto err; + + ret = test_implicit(&domain); + if (ret) + goto err; + ret = test_registered(&domain); if (ret) - return ret; + goto err;
ret = perf_nop(1, 100); if (ret) - return ret; + goto err;
ret = perf_nop(128, 1000); if (ret) - return ret; + goto err;
+err: + async_synchronize_full_domain(&domain); async_unregister_domain(&domain); - return 0; + return ret; }
static void __exit test_async_domain_cleanup(void)
A frequent mode of operation is fanning out N tasks to execute in parallel, collating results, fanning out M tasks, rinse and repeat. This is also common to the notion of the async/sync kernel domain split. A barrier provides a mechanism by which all work queued after the barrier must wait (i.e. not be scheduled) until all work queued before the barrier is completed.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- include/linux/async.h | 4 +++ kernel/async.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+)
diff --git a/include/linux/async.h b/include/linux/async.h index e7d7289a9889..de44306f8cb7 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -26,6 +26,7 @@ struct async_work {
struct async_domain { struct list_head pending; + struct kfence *barrier; unsigned registered:1; };
@@ -59,6 +60,9 @@ extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain);
+extern void async_barrier(void); +extern void async_barrier_domain(struct async_domain *domain); + extern bool current_is_async(void);
extern struct async_work * diff --git a/kernel/async.c b/kernel/async.c index 0d695919a60d..5cfa398a19b2 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -154,6 +154,15 @@ struct async_work *async_work_create(async_func_t func, void *data, gfp_t gfp) } EXPORT_SYMBOL_GPL(async_work_create);
+static void async_barrier_delete(struct async_domain *domain) +{ + if (!domain->barrier) + return; + + kfence_put(domain->barrier); + domain->barrier = NULL; +} + async_cookie_t queue_async_work(struct async_domain *domain, struct async_work *work, gfp_t gfp) @@ -174,6 +183,10 @@ async_cookie_t queue_async_work(struct async_domain *domain, async_pending_count++; spin_unlock_irqrestore(&async_lock, flags);
+ if (domain->barrier && + !kfence_await_kfence(&entry->base.fence, domain->barrier, gfp)) + async_barrier_delete(domain); + /* mark that this task has queued an async job, used by module init */ current->flags |= PF_USED_ASYNC;
@@ -241,6 +254,63 @@ async_cookie_t async_schedule_domain(async_func_t func, void *data, } EXPORT_SYMBOL_GPL(async_schedule_domain);
+static struct kfence *__async_barrier_create(struct async_domain *domain) +{ + struct kfence *fence; + struct async_entry *entry; + unsigned long flags; + int ret; + + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + goto out_sync; + + kfence_init(fence, NULL); + + ret = 0; + spin_lock_irqsave(&async_lock, flags); + list_for_each_entry(entry, &domain->pending, pending_link[0]) { + ret |= kfence_await_kfence(fence, + &entry->base.fence, + GFP_ATOMIC); + if (ret < 0) + break; + } + spin_unlock_irqrestore(&async_lock, flags); + if (ret <= 0) + goto out_put; + + if (domain->barrier) + kfence_await_kfence(fence, domain->barrier, GFP_KERNEL); + + kfence_complete(fence); + return fence; + +out_put: + kfence_complete(fence); + kfence_put(fence); +out_sync: + async_synchronize_full_domain(domain); + return NULL; +} + +void async_barrier(void) +{ + async_barrier_domain(&async_dfl_domain); +} +EXPORT_SYMBOL_GPL(async_barrier); + +void async_barrier_domain(struct async_domain *domain) +{ + struct kfence *barrier = __async_barrier_create(domain); + + if (domain->barrier) + kfence_put(domain->barrier); + + domain->barrier = barrier; +} +EXPORT_SYMBOL_GPL(async_barrier_domain); + /** * async_synchronize_full - synchronize all asynchronous function calls * @@ -264,6 +334,8 @@ EXPORT_SYMBOL_GPL(async_synchronize_full); void async_unregister_domain(struct async_domain *domain) { WARN_ON(!list_empty(&domain->pending)); + + async_barrier_delete(domain); domain->registered = 0; } EXPORT_SYMBOL_GPL(async_unregister_domain);
A challenge in driver initialisation is the coordination of many small sometimes independent, sometimes interdependent tasks. We would like to schedule the independent tasks for execution in parallel across as many cores as possible for rapid initialisation, and then schedule all the dependent tasks once they have completed, again running as many of those in parallel as is possible.
Resolving the interdependencies by hand is time consuming and error prone. Instead, we want to declare what dependencies a particular task has, and what that task provides, and let a runtime dependency solver work out what tasks to run and when, and which in parallel. To this end, we introduce the struct async_dependency_graph building upon the kfence and async_work from the previous patches to allow for the runtime computation of the topological task ordering.
The graph is constructed with async_dependency_graph_build(), which takes the task, its dependencies and what it provides, and builds the graph of kfences required for ordering. Additional kfences can be inserted through async_dependency_depends() and async_dependency_provides() for manual control of the execution order, and async_dependency_get() retrieves a kfence for inspection or waiting upon.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Shuah Khan shuahkh@osg.samsung.com Cc: Tejun Heo tj@kernel.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Andrew Morton akpm@linux-foundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Thomas Gleixner tglx@linutronix.de Cc: "Paul E. McKenney" paulmck@linux.vnet.ibm.com Cc: Dan Williams dan.j.williams@intel.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Nikolay Aleksandrov nikolay@cumulusnetworks.com Cc: "David S. Miller" davem@davemloft.net Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Rasmus Villemoes linux@rasmusvillemoes.dk Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org --- include/linux/async.h | 38 +++ kernel/async.c | 250 ++++++++++++++++ lib/Kconfig.debug | 12 + lib/Makefile | 1 + lib/test-async-dependency-graph.c | 316 +++++++++++++++++++++ .../selftests/lib/async-dependency-graph.sh | 10 + 6 files changed, 627 insertions(+) create mode 100644 lib/test-async-dependency-graph.c create mode 100755 tools/testing/selftests/lib/async-dependency-graph.sh
diff --git a/include/linux/async.h b/include/linux/async.h index de44306f8cb7..0a0040d3fc01 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/kfence.h> #include <linux/list.h> +#include <linux/rbtree.h>
typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); @@ -101,4 +102,41 @@ extern async_cookie_t queue_async_work(struct async_domain *domain, gfp_t gfp); extern async_cookie_t schedule_async_work(struct async_work *work);
+/* Build a graph of work based on dependencies generated by keywords. + * The graph must be acyclic. Can be used to both generate a topological + * ordering of tasks, and to execute independent chains of tasks in + * parallel. + */ + +struct async_dependency_graph { + struct rb_root root; + struct list_head list; +}; + +#define ASYNC_DEPENDENCY_GRAPH_INIT(_name) { \ + .root = RB_ROOT, \ + .list = LIST_HEAD_INIT(_name.list), \ +} + +#define ASYNC_DEPENDENCY_GRAPH(_name) \ + struct async_dependency_graph _name = ASYNC_DEPENDENCY_GRAPH_INIT(_name) + +extern int async_dependency_graph_build(struct async_dependency_graph *adg, + async_func_t fn, void *data, + const char *depends, + const char *provides); + +extern int async_dependency_depends(struct async_dependency_graph *adg, + struct kfence *fence, + const char *depends); + +extern int async_dependency_provides(struct async_dependency_graph *adg, + struct kfence *fence, + const char *provides); + +extern struct kfence *async_dependency_get(struct async_dependency_graph *adg, + const char *name); + +extern void async_dependency_graph_execute(struct async_dependency_graph *adg); + #endif diff --git a/kernel/async.c b/kernel/async.c index 5cfa398a19b2..ac12566f2e0b 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -447,3 +447,253 @@ void init_async_domain(struct async_domain *domain, bool registered) domain->registered = registered; } EXPORT_SYMBOL_GPL(init_async_domain); + +struct async_dependency { + struct kfence fence; + struct rb_node node; + struct list_head link; + char name[0]; +}; + +static struct async_dependency * +__lookup_dependency(struct async_dependency_graph *adg, const char *name) +{ + struct rb_node **p, *parent; + struct async_dependency *d; + int len; + + parent = NULL; + p = &adg->root.rb_node; + while (*p) { + int cmp; + + parent = *p; + d = container_of(parent, typeof(*d), node); + + cmp = strcmp(name, d->name); + if (cmp < 0) + p = &parent->rb_left; + else if (cmp > 0) + p = &parent->rb_right; + else + return d; + } + + len = strlen(name) + 1; + d = kmalloc(sizeof(*d) + len, GFP_KERNEL); + if (!d) + return ERR_PTR(-ENOMEM); + + kfence_init(&d->fence, NULL); + memcpy(d->name, name, len); + + rb_link_node(&d->node, parent, p); + rb_insert_color(&d->node, &adg->root); + list_add_tail(&d->link, &adg->list); + + return d; +} + +/** + * async_dependency_depends - declare a prerequisite fence for a named stage + * @adg: the async_dependency_graph for tracking the named stages + * @fence: the kfence to add that depends upon the named stage completing + * @depends: the named stage + * + * This function appends @fence into the async_dependency_graph @adg after + * the @depends stage is completed. That is the @fence is signaled once + * the chain of dependencies upto and including @depends is complete. + * + * Returns: 0 on success, negative error code on failure. + * In particular, note that if CONFIG_KFENCE_CHECK_DAG is enabled, the + * dependency graph will be checked for cycles, and -EINVAL reported + * in such cases. A dependency cycle leads to unexecutable code. + */ +int async_dependency_depends(struct async_dependency_graph *adg, + struct kfence *fence, + const char *depends) +{ + struct async_dependency *d; + + d = __lookup_dependency(adg, depends); + if (IS_ERR(d)) + return PTR_ERR(d); + + return kfence_await_kfence(fence, &d->fence, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(async_dependency_depends); + +/** + * async_dependency_provides - declare a named stage that should follow + * @adg: the async_dependency_graph for tracking the named stages + * @fence: the kfence to add that provides the named stage with a signal + * @depends: the named stage + * + * This function inserts @fence into the async_dependency_graph @adg before + * the @provides stage is signaled. That is the @fence signals the + * @provides stage once completed (and once all providers have completed, + * work from the @provides commences). + * + * Returns: 0 on success, negative error code on failure. + * In particular, note that if CONFIG_KFENCE_CHECK_DAG is enabled, the + * dependency graph will be checked for cycles, and -EINVAL reported + * in such cases. A dependency cycle leads to unexecutable code. + */ +int async_dependency_provides(struct async_dependency_graph *adg, + struct kfence *fence, + const char *provides) +{ + struct async_dependency *d; + + d = __lookup_dependency(adg, provides); + if (IS_ERR(d)) + return PTR_ERR(d); + + return kfence_await_kfence(&d->fence, fence, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(async_dependency_provides); + +/** + * async_dependency_get - lookup the kfence for a named stage + * @adg: the async_dependency_graph for tracking the named stages + * @name: the named stage + * + * This function lookups the kfence associated with the named stage. This + * fence will be signaled once the named stage is ready. For example, + * waiting on that fence will wait until all prior dependencies of that + * named stage have been completed. + * + * Returns: a new reference on the kfence. The caller must release the + * reference with kfence_put() when finished. + */ +struct kfence *async_dependency_get(struct async_dependency_graph *adg, + const char *name) +{ + struct async_dependency *d; + + d = __lookup_dependency(adg, name); + if (IS_ERR(d)) + return ERR_CAST(d); + + return kfence_get(&d->fence); +} +EXPORT_SYMBOL_GPL(async_dependency_get); + +static int __adg_for_each_token(struct async_dependency_graph *adg, + struct kfence *fence, + const char *string, + int (*fn)(struct async_dependency_graph *, + struct kfence *, + const char *)) +{ + char *tmp, *s, *t; + int ret = 0; + + if (!string) + return 0; + + tmp = kstrdup(string, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + for (s = tmp; (t = strsep(&s, ",")); ) { + if (*t == '\0') + continue; + + ret |= fn(adg, fence, t); + if (ret < 0) + break; + } + + kfree(tmp); + return ret; +} + +/** + * async_dependency_graph_build - insert a task into the dependency graph + * @adg: the async_dependency_graph for tracking the named stages + * @fn: the async_func_t to execute + * @data: the data to pass to the @fn + * @depends: a comma-separated list of named stages that must complete + * before the task can execute + * @provides: a comma-separated list of named stages that will be signaled + * when this task completes + * + * This function inserts the task @fn into the async_dependency_graph @adg + * after all the named stages in @depends have completed. Upon completion + * of the task, all the named stages in @provides are signaled (and once all + * their dependent tasks have also finished, the tasks afterwards will + * execute). + * + * If a task has no dependency (@depends is NULL or an empty string), it will + * be scheduled for execution as soon as it is inserted into the graph @adg. + * + * Returns: 0 on success, negative error code on failure. + * In particular, note that if CONFIG_KFENCE_CHECK_DAG is enabled, the + * dependency graph will be checked for cycles, and -EINVAL reported + * in such cases. A dependency cycle leads to unexecutable code. + */ +int +async_dependency_graph_build(struct async_dependency_graph *adg, + async_func_t fn, void *data, + const char *depends, const char *provides) +{ + struct async_work *work; + int ret; + + work = async_work_create(fn, data, GFP_KERNEL); + if (!work) + return -ENOMEM; + + ret = __adg_for_each_token(adg, &work->fence, depends, + async_dependency_depends); + if (ret < 0) + goto err; + + ret = __adg_for_each_token(adg, &work->fence, provides, + async_dependency_provides); + if (ret < 0) + goto err; + + if (!schedule_async_work(work)) { + ret = -ENOMEM; + goto err; + } + + ret = 0; +out: + async_work_put(work); + return ret; + +err: + work->fence.flags = 0; + kfence_complete(&work->fence); + goto out; +} +EXPORT_SYMBOL_GPL(async_dependency_graph_build); + +/** + * async_dependency_graph_execute - execute the dependency graph + * @adg: the async_dependency_graph + * + * This function marks the @adg as ready for execution. As soon as the + * dependencies of a task have been completed (in their entirety), that + * task is executed. Once completed, it signals the tasks that have listed + * its @provides as one of their @depends, and once ready (all @provides are + * complete) those tasks are scheduled for execution. + * + * Tasks are executed in the topological order of their dependencies. If two, + * or more, tasks are not dependent on each other they may be run concurrently. + * + * The graph @adg is freed upon execution. + */ +void async_dependency_graph_execute(struct async_dependency_graph *adg) +{ + struct async_dependency *d, *next; + + list_for_each_entry_safe(d, next, &adg->list, link) { + kfence_complete(&d->fence); + kfence_put(&d->fence); + } +} +EXPORT_SYMBOL_GPL(async_dependency_graph_execute); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4b180aed88b6..ad3c94ec909e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1795,6 +1795,18 @@ config ASYNC_DOMAIN_SELFTEST
Say N if you are unsure.
+config ASYNC_DEPENDENCY_GRAPH_SELFTEST + tristate "Asynchronous dependency graph self tests" + depends on DEBUG_KERNEL + default n + help + This option provides a kernel modules that can be used to test + the asynchronous dependency graph. This option is not useful for + distributions or general kernels, but only for kernel developers + working on the async_dependency_graph facility. + + Say N if you are unsure. + config BACKTRACE_SELF_TEST tristate "Self test for the backtrace code" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 5864053cf63e..fd43aaa8846d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,6 +29,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o obj-$(CONFIG_ASYNC_DOMAIN_SELFTEST) += test-async-domain.o +obj-$(CONFIG_ASYNC_DEPENDENCY_GRAPH_SELFTEST) += test-async-dependency-graph.o obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/test-async-dependency-graph.c b/lib/test-async-dependency-graph.c new file mode 100644 index 000000000000..3bf2d91a67e6 --- /dev/null +++ b/lib/test-async-dependency-graph.c @@ -0,0 +1,316 @@ +/* + * Test cases for async-dependency-graph facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/async.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/string.h> + +struct chain { + atomic_t idx; + unsigned long values[0]; +}; + +struct task_write { + struct chain *chain; + unsigned long value; +}; + +static void __init task_write(void *arg, async_cookie_t cookie) +{ + struct task_write *t = arg; + int idx = atomic_inc_return(&t->chain->idx) - 1; + WRITE_ONCE(t->chain->values[idx], t->value); +} + +static void __init task_nop(void *data, async_cookie_t cookie) +{ +} + +static int __init test_ordering(int nchain, int nwide) +{ + ASYNC_DEPENDENCY_GRAPH(adg); + struct chain **chains; + struct task_write *tests, *t; + int c, w, ret; + + /* Test implementation of simple chains within the dependency graphs */ + pr_debug("%s(nchain=%d, nwide=%d)\n", __func__, nchain, nwide); + + chains = kmalloc_array(nwide, sizeof(struct chain *), GFP_KERNEL); + tests = kmalloc_array(nchain, sizeof(struct task_write), GFP_KERNEL); + if (!chains || !tests) + return -ENOMEM; + + t = tests; + for (w = 0; w < nwide; w++) { + char *depends = NULL, *provides; + + chains[w] = kzalloc(sizeof(struct chain) + + nchain*sizeof(unsigned long), + GFP_KERNEL); + + for (c = 0; c < nchain; c++) { + t->chain = chains[w]; + t->value = c; + provides = kasprintf(GFP_KERNEL, "%d.%d", c, w); + async_dependency_graph_build(&adg, task_write, t, + depends, provides); + kfree(depends); + depends = provides; + t++; + } + + kfree(depends); + } + async_dependency_graph_execute(&adg); + async_synchronize_full(); + + ret = 0; + kfree(tests); + for (w = 0; w < nwide; w++) { + for (c = 0; c < nchain; c++) { + if (chains[w]->values[c] != c) { + pr_err("%s(%d, %d): Invalid execution order (chain %d, position %d): found %d\n", + __func__, nchain, nwide, + w, c, (int)chains[w]->values[c]); + + ret = -EINVAL; + } + } + kfree(chains[w]); + } + kfree(chains); + + return ret; +} + +static int __init test_barrier(int nwide) +{ + ASYNC_DEPENDENCY_GRAPH(adg); + struct chain **chains; + struct task_write *tests, *t; + int c, w, ret; + + /* Test implementation of barriers within the dependency graphs */ + pr_debug("%s(nwide=%d)\n", __func__, nwide); + + chains = kmalloc_array(nwide, sizeof(struct chain *), GFP_KERNEL); + tests = kmalloc_array(nwide, 2*sizeof(struct task_write), GFP_KERNEL); + if (!chains || !tests) + return -ENOMEM; + + t = tests; + + /* A,B act as a barrier running between the nops */ + for (w = 0; w < nwide; w++) { + char *provides, *depends; + + chains[w] = kzalloc(sizeof(struct chain) + + 2*sizeof(unsigned long), + GFP_KERNEL); + + depends = NULL; + + provides = kasprintf(GFP_KERNEL, "nop1.%d", w); + async_dependency_graph_build(&adg, task_nop, NULL, + depends, provides); + async_dependency_graph_build(&adg, task_nop, NULL, + depends, provides); + + kfree(depends); + depends = provides; + + provides = kasprintf(GFP_KERNEL, "A.%d", w); + t->chain = chains[w]; + t->value = 0; + async_dependency_graph_build(&adg, task_write, t, + depends, provides); + t++; + + kfree(depends); + depends = provides; + + provides = kasprintf(GFP_KERNEL, "nop2.%d", w); + async_dependency_graph_build(&adg, task_nop, NULL, + depends, provides); + kfree(provides); + + provides = kasprintf(GFP_KERNEL, "nop3.%d", w); + async_dependency_graph_build(&adg, task_nop, NULL, + depends, provides); + kfree(provides); + + kfree(depends); + depends = kasprintf(GFP_KERNEL, "nop2.%d,nop3.%d", w, w); + t->chain = chains[w]; + t->value = 1; + async_dependency_graph_build(&adg, task_write, t, + depends, NULL); + kfree(depends); + t++; + } + async_dependency_graph_execute(&adg); + async_synchronize_full(); + + ret = 0; + kfree(tests); + for (w = 0; w < nwide; w++) { + for (c = 0; c < 2; c++) { + if (chains[w]->values[c] != c) { + pr_err("%s(%d): Invalid execution order (chain %d, position %d): found %d\n", + __func__, nwide, + w, c, (int)chains[w]->values[c]); + + ret = -EINVAL; + } + } + kfree(chains[w]); + } + kfree(chains); + + return ret; +} + +static int __init test_dag(void) +{ + ASYNC_DEPENDENCY_GRAPH(adg); + + /* Test detection of cycles within the dependency graphs */ + pr_debug("%s\n", __func__); + + if (!config_enabled(CONFIG_KFENCE_CHECK_DAG)) + return 0; + + async_dependency_graph_build(&adg, task_nop, NULL, "__start__", "A"); + if (async_dependency_graph_build(&adg, task_nop, NULL, "A", "A") != -EINVAL) { + pr_err("Failed to detect AA cycle\n"); + return -EINVAL; + } + + async_dependency_graph_build(&adg, task_nop, NULL, "A", "B"); + if (async_dependency_graph_build(&adg, task_nop, NULL, "B", "A") != -EINVAL) { + pr_err("Failed to detect ABA cycle\n"); + return -EINVAL; + } + + async_dependency_graph_build(&adg, task_nop, NULL, "B", "C"); + if (async_dependency_graph_build(&adg, task_nop, NULL, "C", "A") != -EINVAL) { + pr_err("Failed to detect ABCA cycle\n"); + return -EINVAL; + } + + async_dependency_graph_execute(&adg); + async_synchronize_full(); + + return 0; +} + +static int __init perf_nop(int chain, int width, long timeout_us) +{ + ktime_t start; + long count, delay; + + count = 0; + start = ktime_get(); + do { + ASYNC_DEPENDENCY_GRAPH(adg); + ktime_t delta; + int c, w; + + for (w = 0; w < width; w++) { + char *depends = NULL, *provides; + + for (c = 0; c < chain; c++) { + provides = kasprintf(GFP_KERNEL, "%d.%d", c, w); + async_dependency_graph_build(&adg, + task_nop, NULL, + depends, provides); + kfree(depends); + depends = provides; + } + + kfree(depends); + } + async_dependency_graph_execute(&adg); + async_synchronize_full(); + delta = ktime_sub(ktime_get(), start); + delay = ktime_to_ns(delta) >> 10; + count += width * chain; + } while (delay < timeout_us); + + pr_info("%ld nop tasks (in chains of %d, %d chains in parallel) completed in %ldus\n", + count, chain, width, delay); + return 0; +} + +static int __init test_async_dependency_graph_init(void) +{ + int ret; + + pr_info("Testing async-dependency-graph\n"); + + ret = test_ordering(1, 1); + if (ret) + return ret; + + ret = test_ordering(2, 1); + if (ret) + return ret; + + ret = test_ordering(1, 2); + if (ret) + return ret; + + ret = test_ordering(2, 2); + if (ret) + return ret; + + ret = test_ordering(26, 26); + if (ret) + return ret; + + ret = test_dag(); + if (ret) + return ret; + + ret = test_barrier(1); + if (ret) + return ret; + + ret = test_barrier(16); + if (ret) + return ret; + + ret = perf_nop(1, 1, 100); + if (ret) + return ret; + + ret = perf_nop(256, 1, 2000); + if (ret) + return ret; + + ret = perf_nop(128, 2, 2000); + if (ret) + return ret; + + ret = perf_nop(16, 16, 2000); + if (ret) + return ret; + + return 0; +} + +static void __exit test_async_dependency_graph_cleanup(void) +{ +} + +module_init(test_async_dependency_graph_init); +module_exit(test_async_dependency_graph_cleanup); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/lib/async-dependency-graph.sh b/tools/testing/selftests/lib/async-dependency-graph.sh new file mode 100755 index 000000000000..ea4bbc76f60f --- /dev/null +++ b/tools/testing/selftests/lib/async-dependency-graph.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs infrastructure tests using test-async-dependency-graph kernel module + +if /sbin/modprobe -q test-async-dependency-graph; then + /sbin/modprobe -q -r test-async-dependency-graph + echo "async-dependency-graph: ok" +else + echo "async-dependency-graph: [FAIL]" + exit 1 +fi
dri-devel@lists.freedesktop.org