Often we have the task of comparing two seqno known to be on the same context, so provide a common __dma_fence_is_later().
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- include/linux/dma-fence.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a5195a7d6f77..ac5987989e9a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -336,6 +336,19 @@ dma_fence_is_signaled(struct dma_fence *fence) }
/** + * __dma_fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence's seqno + * @f2: [in] the second fence's seqno from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not common across contexts. + */ +static inline bool __dma_fence_is_later(u32 f1, u32 f2) +{ + return (int)(f1 - f2) > 0; +} + +/** * dma_fence_is_later - return if f1 is chronologically later than f2 * @f1: [in] the first fence from the same context * @f2: [in] the second fence from the same context @@ -349,7 +362,7 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, if (WARN_ON(f1->context != f2->context)) return false;
- return (int)(f1->seqno - f2->seqno) > 0; + return __dma_fence_is_later(f1->seqno, f2->seqno); }
/**
Use the canonical __dma_fence_is_later() to compare the fence seqno against the timeline seqno to check if the fence is signaled.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 69c5ff36e2f9..4d5d8c5e2534 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -219,7 +219,7 @@ static bool timeline_fence_signaled(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence);
- return (fence->seqno > parent->value) ? false : true; + return !__dma_fence_is_later(fence->seqno, parent->value); }
static bool timeline_fence_enable_signaling(struct dma_fence *fence)
The timeline is u32, which limits any single advance to INT_MAX so that we can detect all fences that need signaling.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 4d5d8c5e2534..0e676d08aa70 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -345,6 +345,11 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT;
+ while (value > INT_MAX) { + sync_timeline_signal(obj, INT_MAX); + value -= INT_MAX; + } + sync_timeline_signal(obj, value);
return 0;
If we know the context under which we are called, then we can use the simpler form of spin_lock_irq (saving the save/restore).
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 15 +++++++++------ drivers/dma-buf/sync_debug.c | 14 ++++++-------- 2 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 0e676d08aa70..fc733621987d 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -135,12 +135,11 @@ static void sync_timeline_put(struct sync_timeline *obj) */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { - unsigned long flags; struct sync_pt *pt, *next;
trace_sync_timeline(obj);
- spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock);
obj->value += inc;
@@ -150,7 +149,7 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) list_del_init(&pt->active_list); }
- spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->child_list_lock); }
/** @@ -167,7 +166,6 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, unsigned int value) { - unsigned long flags; struct sync_pt *pt;
if (size < sizeof(*pt)) @@ -177,13 +175,16 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, if (!pt) return NULL;
- spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock); + sync_timeline_get(obj); dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, obj->context, value); list_add_tail(&pt->child_list, &obj->child_list_head); INIT_LIST_HEAD(&pt->active_list); - spin_unlock_irqrestore(&obj->child_list_lock, flags); + + spin_unlock_irq(&obj->child_list_lock); + return pt; }
@@ -206,9 +207,11 @@ static void timeline_fence_release(struct dma_fence *fence) unsigned long flags;
spin_lock_irqsave(fence->lock, flags); + list_del(&pt->child_list); if (!list_empty(&pt->active_list)) list_del(&pt->active_list); + spin_unlock_irqrestore(fence->lock, flags);
sync_timeline_put(parent); diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 82a6e7f6d37f..0e91632248ba 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -116,17 +116,16 @@ static void sync_print_fence(struct seq_file *s, static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; - unsigned long flags;
seq_printf(s, "%s: %d\n", obj->name, obj->value);
- spin_lock_irqsave(&obj->child_list_lock, flags); + spin_lock_irq(&obj->child_list_lock); list_for_each(pos, &obj->child_list_head) { struct sync_pt *pt = container_of(pos, struct sync_pt, child_list); sync_print_fence(s, &pt->base, false); } - spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->child_list_lock); }
static void sync_print_sync_file(struct seq_file *s, @@ -151,12 +150,11 @@ static void sync_print_sync_file(struct seq_file *s,
static int sync_debugfs_show(struct seq_file *s, void *unused) { - unsigned long flags; struct list_head *pos;
seq_puts(s, "objs:\n--------------\n");
- spin_lock_irqsave(&sync_timeline_list_lock, flags); + spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, @@ -165,11 +163,11 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_obj(s, obj); seq_putc(s, '\n'); } - spin_unlock_irqrestore(&sync_timeline_list_lock, flags); + spin_unlock_irq(&sync_timeline_list_lock);
seq_puts(s, "fences:\n--------------\n");
- spin_lock_irqsave(&sync_file_list_lock, flags); + spin_lock_irq(&sync_file_list_lock); list_for_each(pos, &sync_file_list_head) { struct sync_file *sync_file = container_of(pos, struct sync_file, sync_file_list); @@ -177,7 +175,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_sync_file(s, sync_file); seq_putc(s, '\n'); } - spin_unlock_irqrestore(&sync_file_list_lock, flags); + spin_unlock_irq(&sync_file_list_lock); return 0; }
Since sync_pt is only allocated from a single location and is no longer the base class for fences (that is struct dma_fence) it no longer needs a generic unsized allocator.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index fc733621987d..6effa1ce010e 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -155,7 +155,6 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) /** * sync_pt_create() - creates a sync pt * @parent: fence's parent sync_timeline - * @size: size to allocate for this pt * @inc: value of the fence * * Creates a new sync_pt as a child of @parent. @size bytes will be @@ -163,15 +162,12 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) * the generic sync_timeline struct. Returns the sync_pt object or * NULL in case of error. */ -static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, - unsigned int value) +static struct sync_pt *sync_pt_create(struct sync_timeline *obj, + unsigned int value) { struct sync_pt *pt;
- if (size < sizeof(*pt)) - return NULL; - - pt = kzalloc(size, GFP_KERNEL); + pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return NULL;
@@ -312,7 +308,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, goto err; }
- pt = sync_pt_create(obj, sizeof(*pt), data.value); + pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err;
The sync_pt were not adding themselves atomically to the timeline lists, corruption imminent. Only a single list is required to track the unsignaled sync_pt, so reduce it and rename the lock more appropriately along with using idiomatic names to distinguish a list from links along it.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 39 ++++++++++++++------------------------- drivers/dma-buf/sync_debug.c | 9 ++++----- drivers/dma-buf/sync_debug.h | 21 ++++++++------------- 3 files changed, 26 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 6effa1ce010e..e51fe11bbbea 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,8 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
- INIT_LIST_HEAD(&obj->child_list_head); - INIT_LIST_HEAD(&obj->active_list_head); - spin_lock_init(&obj->child_list_lock); + INIT_LIST_HEAD(&obj->pt_list); + spin_lock_init(&obj->lock);
sync_timeline_debug_add(obj);
@@ -139,17 +138,15 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
trace_sync_timeline(obj);
- spin_lock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock);
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->active_list_head, - active_list) { + list_for_each_entry_safe(pt, next, &obj->pt_list, link) if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->active_list); - } + list_del_init(&pt->link);
- spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); }
/** @@ -171,15 +168,15 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, if (!pt) return NULL;
- spin_lock_irq(&obj->child_list_lock); - sync_timeline_get(obj); - dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, + dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value); - list_add_tail(&pt->child_list, &obj->child_list_head); - INIT_LIST_HEAD(&pt->active_list); + INIT_LIST_HEAD(&pt->link);
- spin_unlock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock); + if (!dma_fence_is_signaled_locked(&pt->base)) + list_add_tail(&pt->link, &obj->pt_list); + spin_unlock_irq(&obj->lock);
return pt; } @@ -204,9 +201,8 @@ static void timeline_fence_release(struct dma_fence *fence)
spin_lock_irqsave(fence->lock, flags);
- list_del(&pt->child_list); - if (!list_empty(&pt->active_list)) - list_del(&pt->active_list); + if (!list_empty(&pt->link)) + list_del(&pt->link);
spin_unlock_irqrestore(fence->lock, flags);
@@ -223,13 +219,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence)
static bool timeline_fence_enable_signaling(struct dma_fence *fence) { - struct sync_pt *pt = dma_fence_to_sync_pt(fence); - struct sync_timeline *parent = dma_fence_parent(fence); - - if (timeline_fence_signaled(fence)) - return false; - - list_add_tail(&pt->active_list, &parent->active_list_head); return true; }
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 0e91632248ba..2264a075f6a9 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -119,13 +119,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
seq_printf(s, "%s: %d\n", obj->name, obj->value);
- spin_lock_irq(&obj->child_list_lock); - list_for_each(pos, &obj->child_list_head) { - struct sync_pt *pt = - container_of(pos, struct sync_pt, child_list); + spin_lock_irq(&obj->lock); + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); }
static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 26fe8b9907b3..899ba0e19fd3 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -24,42 +24,37 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging - * @child_list_head: list of children sync_pts for this sync_timeline - * @child_list_lock: lock protecting @child_list_head and fence.status - * @active_list_head: list of active (unsignaled/errored) sync_pts + * @lock: lock protecting @child_list_head and fence.status + * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ struct sync_timeline { struct kref kref; char name[32];
- /* protected by child_list_lock */ + /* protected by lock */ u64 context; int value;
- struct list_head child_list_head; - spinlock_t child_list_lock; - - struct list_head active_list_head; + struct list_head pt_list; + spinlock_t lock;
struct list_head sync_timeline_list; };
static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, child_list_lock); + return container_of(fence->lock, struct sync_timeline, lock); }
/** * struct sync_pt - sync_pt object * @base: base fence object - * @child_list: sync timeline child's list - * @active_list: sync timeline active child's list + * @link: link on the sync timeline's list */ struct sync_pt { struct dma_fence base; - struct list_head child_list; - struct list_head active_list; + struct list_head link; };
#ifdef CONFIG_SW_SYNC
On Thu, Jun 29, 2017 at 01:59:29PM +0100, Chris Wilson wrote:
The sync_pt were not adding themselves atomically to the timeline lists, corruption imminent. Only a single list is required to track the unsignaled sync_pt, so reduce it and rename the lock more appropriately along with using idiomatic names to distinguish a list from links along it.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org
drivers/dma-buf/sw_sync.c | 39 ++++++++++++++------------------------- drivers/dma-buf/sync_debug.c | 9 ++++----- drivers/dma-buf/sync_debug.h | 21 ++++++++------------- 3 files changed, 26 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 6effa1ce010e..e51fe11bbbea 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,8 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
- INIT_LIST_HEAD(&obj->child_list_head);
- INIT_LIST_HEAD(&obj->active_list_head);
- spin_lock_init(&obj->child_list_lock);
INIT_LIST_HEAD(&obj->pt_list);
spin_lock_init(&obj->lock);
sync_timeline_debug_add(obj);
@@ -139,17 +138,15 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
trace_sync_timeline(obj);
- spin_lock_irq(&obj->child_list_lock);
spin_lock_irq(&obj->lock);
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->active_list_head,
active_list) {
- list_for_each_entry_safe(pt, next, &obj->pt_list, link) if (dma_fence_is_signaled_locked(&pt->base))
list_del_init(&pt->active_list);
- }
list_del_init(&pt->link);
- spin_unlock_irq(&obj->child_list_lock);
- spin_unlock_irq(&obj->lock);
}
/** @@ -171,15 +168,15 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, if (!pt) return NULL;
- spin_lock_irq(&obj->child_list_lock);
- sync_timeline_get(obj);
- dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
- dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value);
- list_add_tail(&pt->child_list, &obj->child_list_head);
- INIT_LIST_HEAD(&pt->active_list);
- INIT_LIST_HEAD(&pt->link);
- spin_unlock_irq(&obj->child_list_lock);
spin_lock_irq(&obj->lock);
if (!dma_fence_is_signaled_locked(&pt->base))
list_add_tail(&pt->link, &obj->pt_list);
spin_unlock_irq(&obj->lock);
return pt;
} @@ -204,9 +201,8 @@ static void timeline_fence_release(struct dma_fence *fence)
spin_lock_irqsave(fence->lock, flags);
- list_del(&pt->child_list);
- if (!list_empty(&pt->active_list))
list_del(&pt->active_list);
if (!list_empty(&pt->link))
list_del(&pt->link);
spin_unlock_irqrestore(fence->lock, flags);
@@ -223,13 +219,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence)
static bool timeline_fence_enable_signaling(struct dma_fence *fence) {
- struct sync_pt *pt = dma_fence_to_sync_pt(fence);
- struct sync_timeline *parent = dma_fence_parent(fence);
- if (timeline_fence_signaled(fence))
return false;
- list_add_tail(&pt->active_list, &parent->active_list_head); return true;
Shouldn't you still return false if the fence is already signaled?
}
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 0e91632248ba..2264a075f6a9 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -119,13 +119,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
seq_printf(s, "%s: %d\n", obj->name, obj->value);
- spin_lock_irq(&obj->child_list_lock);
- list_for_each(pos, &obj->child_list_head) {
struct sync_pt *pt =
container_of(pos, struct sync_pt, child_list);
- spin_lock_irq(&obj->lock);
- list_for_each(pos, &obj->pt_list) {
sync_print_fence(s, &pt->base, false); }struct sync_pt *pt = container_of(pos, struct sync_pt, link);
- spin_unlock_irq(&obj->child_list_lock);
- spin_unlock_irq(&obj->lock);
}
static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 26fe8b9907b3..899ba0e19fd3 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -24,42 +24,37 @@
- struct sync_timeline - sync object
- @kref: reference count on fence.
- @name: name of the sync_timeline. Useful for debugging
- @child_list_head: list of children sync_pts for this sync_timeline
- @child_list_lock: lock protecting @child_list_head and fence.status
- @active_list_head: list of active (unsignaled/errored) sync_pts
- @lock: lock protecting @child_list_head and fence.status
s/child_list/pt_list/
*/
- @pt_list: list of active (unsignaled/errored) sync_pts
- @sync_timeline_list: membership in global sync_timeline_list
struct sync_timeline { struct kref kref; char name[32];
- /* protected by child_list_lock */
- /* protected by lock */ u64 context; int value;
- struct list_head child_list_head;
- spinlock_t child_list_lock;
- struct list_head active_list_head;
struct list_head pt_list;
spinlock_t lock;
struct list_head sync_timeline_list;
};
static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) {
- return container_of(fence->lock, struct sync_timeline, child_list_lock);
- return container_of(fence->lock, struct sync_timeline, lock);
}
/**
- struct sync_pt - sync_pt object
- @base: base fence object
- @child_list: sync timeline child's list
- @active_list: sync timeline active child's list
*/
- @link: link on the sync timeline's list
struct sync_pt { struct dma_fence base;
- struct list_head child_list;
- struct list_head active_list;
- struct list_head link;
};
#ifdef CONFIG_SW_SYNC
2.13.1
Quoting Sean Paul (2017-06-29 18:22:10)
On Thu, Jun 29, 2017 at 01:59:29PM +0100, Chris Wilson wrote:
The sync_pt were not adding themselves atomically to the timeline lists, corruption imminent. Only a single list is required to track the unsignaled sync_pt, so reduce it and rename the lock more appropriately along with using idiomatic names to distinguish a list from links along it.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org
drivers/dma-buf/sw_sync.c | 39 ++++++++++++++------------------------- drivers/dma-buf/sync_debug.c | 9 ++++----- drivers/dma-buf/sync_debug.h | 21 ++++++++------------- 3 files changed, 26 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 6effa1ce010e..e51fe11bbbea 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,8 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
INIT_LIST_HEAD(&obj->child_list_head);
INIT_LIST_HEAD(&obj->active_list_head);
spin_lock_init(&obj->child_list_lock);
INIT_LIST_HEAD(&obj->pt_list);
spin_lock_init(&obj->lock); sync_timeline_debug_add(obj);
@@ -139,17 +138,15 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
trace_sync_timeline(obj);
spin_lock_irq(&obj->child_list_lock);
spin_lock_irq(&obj->lock); obj->value += inc;
list_for_each_entry_safe(pt, next, &obj->active_list_head,
active_list) {
list_for_each_entry_safe(pt, next, &obj->pt_list, link) if (dma_fence_is_signaled_locked(&pt->base))
list_del_init(&pt->active_list);
}
list_del_init(&pt->link);
spin_unlock_irq(&obj->child_list_lock);
spin_unlock_irq(&obj->lock);
}
/** @@ -171,15 +168,15 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, if (!pt) return NULL;
spin_lock_irq(&obj->child_list_lock);
sync_timeline_get(obj);
dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value);
list_add_tail(&pt->child_list, &obj->child_list_head);
INIT_LIST_HEAD(&pt->active_list);
INIT_LIST_HEAD(&pt->link);
spin_unlock_irq(&obj->child_list_lock);
spin_lock_irq(&obj->lock);
if (!dma_fence_is_signaled_locked(&pt->base))
list_add_tail(&pt->link, &obj->pt_list);
spin_unlock_irq(&obj->lock); return pt;
} @@ -204,9 +201,8 @@ static void timeline_fence_release(struct dma_fence *fence)
spin_lock_irqsave(fence->lock, flags);
list_del(&pt->child_list);
if (!list_empty(&pt->active_list))
list_del(&pt->active_list);
if (!list_empty(&pt->link))
list_del(&pt->link); spin_unlock_irqrestore(fence->lock, flags);
@@ -223,13 +219,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence)
static bool timeline_fence_enable_signaling(struct dma_fence *fence) {
struct sync_pt *pt = dma_fence_to_sync_pt(fence);
struct sync_timeline *parent = dma_fence_parent(fence);
if (timeline_fence_signaled(fence))
return false;
list_add_tail(&pt->active_list, &parent->active_list_head); return true;
Shouldn't you still return false if the fence is already signaled?
Yes/no :)
In this case, it is immaterial as the only way the timeline can advance is underneath its big lock and by signaling all the fences. So by the time dma_fence calls fence->ops->enable_signaling under that same lock we already know that the fence isn't signaled and can't suddenly be signaled in the middle of the function call. -Chris
The sync_pt were not adding themselves atomically to the timeline lists, corruption imminent. Only a single list is required to track the unsignaled sync_pt, so reduce it and rename the lock more appropriately along with using idiomatic names to distinguish a list from links along it.
v2: Prevent spinlock recursion on free during create (next patch) and fixup crossref in kerneldoc
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 48 ++++++++++++++++++-------------------------- drivers/dma-buf/sync_debug.c | 9 ++++----- drivers/dma-buf/sync_debug.h | 21 ++++++++----------- 3 files changed, 31 insertions(+), 47 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 6effa1ce010e..f20d18c421a3 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,8 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
- INIT_LIST_HEAD(&obj->child_list_head); - INIT_LIST_HEAD(&obj->active_list_head); - spin_lock_init(&obj->child_list_lock); + INIT_LIST_HEAD(&obj->pt_list); + spin_lock_init(&obj->lock);
sync_timeline_debug_add(obj);
@@ -139,17 +138,15 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
trace_sync_timeline(obj);
- spin_lock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock);
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->active_list_head, - active_list) { + list_for_each_entry_safe(pt, next, &obj->pt_list, link) if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->active_list); - } + list_del_init(&pt->link);
- spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); }
/** @@ -171,15 +168,15 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, if (!pt) return NULL;
- spin_lock_irq(&obj->child_list_lock); - sync_timeline_get(obj); - dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, + dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value); - list_add_tail(&pt->child_list, &obj->child_list_head); - INIT_LIST_HEAD(&pt->active_list); + INIT_LIST_HEAD(&pt->link);
- spin_unlock_irq(&obj->child_list_lock); + spin_lock_irq(&obj->lock); + if (!dma_fence_is_signaled_locked(&pt->base)) + list_add_tail(&pt->link, &obj->pt_list); + spin_unlock_irq(&obj->lock);
return pt; } @@ -200,15 +197,15 @@ static void timeline_fence_release(struct dma_fence *fence) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); struct sync_timeline *parent = dma_fence_parent(fence); - unsigned long flags;
- spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + unsigned long flags;
- list_del(&pt->child_list); - if (!list_empty(&pt->active_list)) - list_del(&pt->active_list); - - spin_unlock_irqrestore(fence->lock, flags); + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) + list_del(&pt->link); + spin_unlock_irqrestore(fence->lock, flags); + }
sync_timeline_put(parent); dma_fence_free(fence); @@ -223,13 +220,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence)
static bool timeline_fence_enable_signaling(struct dma_fence *fence) { - struct sync_pt *pt = dma_fence_to_sync_pt(fence); - struct sync_timeline *parent = dma_fence_parent(fence); - - if (timeline_fence_signaled(fence)) - return false; - - list_add_tail(&pt->active_list, &parent->active_list_head); return true; }
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 0e91632248ba..2264a075f6a9 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -119,13 +119,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
seq_printf(s, "%s: %d\n", obj->name, obj->value);
- spin_lock_irq(&obj->child_list_lock); - list_for_each(pos, &obj->child_list_head) { - struct sync_pt *pt = - container_of(pos, struct sync_pt, child_list); + spin_lock_irq(&obj->lock); + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irq(&obj->child_list_lock); + spin_unlock_irq(&obj->lock); }
static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 26fe8b9907b3..6a2a8e69a7d0 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -24,42 +24,37 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging - * @child_list_head: list of children sync_pts for this sync_timeline - * @child_list_lock: lock protecting @child_list_head and fence.status - * @active_list_head: list of active (unsignaled/errored) sync_pts + * @lock: lock protecting @pt_list and @value + * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ struct sync_timeline { struct kref kref; char name[32];
- /* protected by child_list_lock */ + /* protected by lock */ u64 context; int value;
- struct list_head child_list_head; - spinlock_t child_list_lock; - - struct list_head active_list_head; + struct list_head pt_list; + spinlock_t lock;
struct list_head sync_timeline_list; };
static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, child_list_lock); + return container_of(fence->lock, struct sync_timeline, lock); }
/** * struct sync_pt - sync_pt object * @base: base fence object - * @child_list: sync timeline child's list - * @active_list: sync timeline active child's list + * @link: link on the sync timeline's list */ struct sync_pt { struct dma_fence base; - struct list_head child_list; - struct list_head active_list; + struct list_head link; };
#ifdef CONFIG_SW_SYNC
Reduce the list iteration when incrementing the timeline by storing the fences in increasing order.
v2: Prevent spinlock recursion on free during create v3: Fixup rebase conflict inside comments that escaped the compiler.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ drivers/dma-buf/sync_debug.h | 5 +++++ 2 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index f20d18c421a3..af1bc84802e5 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
+ obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock);
@@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->pt_list, link) - if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->link); + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!dma_fence_is_signaled_locked(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + }
spin_unlock_irq(&obj->lock); } @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link);
spin_lock_irq(&obj->lock); - if (!dma_fence_is_signaled_locked(&pt->base)) - list_add_tail(&pt->link, &obj->pt_list); + if (!dma_fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (dma_fence_get_rcu(&other->base)) { + dma_fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: spin_unlock_irq(&obj->lock);
return pt; @@ -202,8 +237,10 @@ static void timeline_fence_release(struct dma_fence *fence) unsigned long flags;
spin_lock_irqsave(fence->lock, flags); - if (!list_empty(&pt->link)) + if (!list_empty(&pt->link)) { list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } spin_unlock_irqrestore(fence->lock, flags); }
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 6a2a8e69a7d0..d615a89f774c 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H
#include <linux/list.h> +#include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/dma-fence.h>
@@ -25,6 +26,7 @@ * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging * @lock: lock protecting @pt_list and @value + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ @@ -36,6 +38,7 @@ struct sync_timeline { u64 context; int value;
+ struct rb_root pt_tree; struct list_head pt_list; spinlock_t lock;
@@ -51,10 +54,12 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) * struct sync_pt - sync_pt object * @base: base fence object * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct dma_fence base; struct list_head link; + struct rb_node node; };
#ifdef CONFIG_SW_SYNC
Reduce the list iteration when incrementing the timeline by storing the fences in increasing order.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ drivers/dma-buf/sync_debug.h | 5 +++++ 2 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index e51fe11bbbea..8cef5d345316 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
+ obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock);
@@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->pt_list, link) - if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->link); + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!dma_fence_is_signaled_locked(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + }
spin_unlock_irq(&obj->lock); } @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link);
spin_lock_irq(&obj->lock); - if (!dma_fence_is_signaled_locked(&pt->base)) - list_add_tail(&pt->link, &obj->pt_list); + if (!dma_fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (dma_fence_get_rcu(&other->base)) { + dma_fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: spin_unlock_irq(&obj->lock);
return pt; @@ -201,8 +236,10 @@ static void timeline_fence_release(struct dma_fence *fence)
spin_lock_irqsave(fence->lock, flags);
- if (!list_empty(&pt->link)) + if (!list_empty(&pt->link)) { list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + }
spin_unlock_irqrestore(fence->lock, flags);
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 899ba0e19fd3..b7a5fab12179 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H
#include <linux/list.h> +#include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/dma-fence.h>
@@ -25,6 +26,7 @@ * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging * @lock: lock protecting @child_list_head and fence.status + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ @@ -36,6 +38,7 @@ struct sync_timeline { u64 context; int value;
+ struct rb_root pt_tree; struct list_head pt_list; spinlock_t lock;
@@ -51,10 +54,12 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) * struct sync_pt - sync_pt object * @base: base fence object * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct dma_fence base; struct list_head link; + struct rb_node node; };
#ifdef CONFIG_SW_SYNC
On Thu, Jun 29, 2017 at 01:59:30PM +0100, Chris Wilson wrote:
Reduce the list iteration when incrementing the timeline by storing the fences in increasing order.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org
drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ drivers/dma-buf/sync_debug.h | 5 +++++ 2 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index e51fe11bbbea..8cef5d345316 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
- obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock);
@@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->pt_list, link)
if (dma_fence_is_signaled_locked(&pt->base))
list_del_init(&pt->link);
list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
if (!dma_fence_is_signaled_locked(&pt->base))
break;
list_del_init(&pt->link);
rb_erase(&pt->node, &obj->pt_tree);
}
spin_unlock_irq(&obj->lock);
} @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link);
spin_lock_irq(&obj->lock);
- if (!dma_fence_is_signaled_locked(&pt->base))
list_add_tail(&pt->link, &obj->pt_list);
- if (!dma_fence_is_signaled_locked(&pt->base)) {
struct rb_node **p = &obj->pt_tree.rb_node;
struct rb_node *parent = NULL;
while (*p) {
struct sync_pt *other;
int cmp;
parent = *p;
other = rb_entry(parent, typeof(*pt), node);
cmp = value - other->base.seqno;
We're imposing an implicit limitation on userspace here that points cannot be
INT_MAX apart (since they'll be in the wrong order in the tree).
I wonder how much this patch will actually save, given that the number of active points on a given timeline should be small. Do we have any evidence that this optimization is warranted?
Sean
if (cmp > 0) {
p = &parent->rb_right;
} else if (cmp < 0) {
p = &parent->rb_left;
} else {
if (dma_fence_get_rcu(&other->base)) {
dma_fence_put(&pt->base);
pt = other;
goto unlock;
}
p = &parent->rb_left;
}
}
rb_link_node(&pt->node, parent, p);
rb_insert_color(&pt->node, &obj->pt_tree);
parent = rb_next(&pt->node);
list_add_tail(&pt->link,
parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list);
- }
+unlock: spin_unlock_irq(&obj->lock);
return pt; @@ -201,8 +236,10 @@ static void timeline_fence_release(struct dma_fence *fence)
spin_lock_irqsave(fence->lock, flags);
- if (!list_empty(&pt->link))
if (!list_empty(&pt->link)) { list_del(&pt->link);
rb_erase(&pt->node, &parent->pt_tree);
}
spin_unlock_irqrestore(fence->lock, flags);
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 899ba0e19fd3..b7a5fab12179 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H
#include <linux/list.h> +#include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/dma-fence.h>
@@ -25,6 +26,7 @@
- @kref: reference count on fence.
- @name: name of the sync_timeline. Useful for debugging
- @lock: lock protecting @child_list_head and fence.status
*/
- @pt_tree: rbtree of active (unsignaled/errored) sync_pts
- @pt_list: list of active (unsignaled/errored) sync_pts
- @sync_timeline_list: membership in global sync_timeline_list
@@ -36,6 +38,7 @@ struct sync_timeline { u64 context; int value;
- struct rb_root pt_tree; struct list_head pt_list; spinlock_t lock;
@@ -51,10 +54,12 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence)
- struct sync_pt - sync_pt object
- @base: base fence object
- @link: link on the sync timeline's list
*/
- @node: node in the sync timeline's tree
struct sync_pt { struct dma_fence base; struct list_head link;
- struct rb_node node;
};
#ifdef CONFIG_SW_SYNC
2.13.1
Quoting Sean Paul (2017-06-29 19:10:11)
On Thu, Jun 29, 2017 at 01:59:30PM +0100, Chris Wilson wrote:
Reduce the list iteration when incrementing the timeline by storing the fences in increasing order.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org
drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ drivers/dma-buf/sync_debug.h | 5 +++++ 2 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index e51fe11bbbea..8cef5d345316 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock);
@@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
obj->value += inc;
list_for_each_entry_safe(pt, next, &obj->pt_list, link)
if (dma_fence_is_signaled_locked(&pt->base))
list_del_init(&pt->link);
list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
if (!dma_fence_is_signaled_locked(&pt->base))
break;
list_del_init(&pt->link);
rb_erase(&pt->node, &obj->pt_tree);
} spin_unlock_irq(&obj->lock);
} @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link);
spin_lock_irq(&obj->lock);
if (!dma_fence_is_signaled_locked(&pt->base))
list_add_tail(&pt->link, &obj->pt_list);
if (!dma_fence_is_signaled_locked(&pt->base)) {
struct rb_node **p = &obj->pt_tree.rb_node;
struct rb_node *parent = NULL;
while (*p) {
struct sync_pt *other;
int cmp;
parent = *p;
other = rb_entry(parent, typeof(*pt), node);
cmp = value - other->base.seqno;
We're imposing an implicit limitation on userspace here that points cannot be
INT_MAX apart (since they'll be in the wrong order in the tree).
That's not a new limitation. It's an artifact of using the u32 timeline/seqno.
I wonder how much this patch will actually save, given that the number of active points on a given timeline should be small. Do we have any evidence that this optimization is warranted?
The good news is that for empty/small trees, the overhead is tiny, less than the cost of the syscall. I honestly don't know who uses sw_sync and so would benefit. I figured I would throw it out here since it was trivial. -Chris
Reduce the list iteration when incrementing the timeline by storing the fences in increasing order.
v2: Prevent spinlock recursion on free during create
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org Cc: Gustavo Padovan gustavo@padovan.org --- drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ drivers/dma-buf/sync_debug.h | 9 ++++++++ 2 files changed, 52 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index f20d18c421a3..af1bc84802e5 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name));
+ obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock);
@@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
obj->value += inc;
- list_for_each_entry_safe(pt, next, &obj->pt_list, link) - if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->link); + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!dma_fence_is_signaled_locked(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + }
spin_unlock_irq(&obj->lock); } @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, INIT_LIST_HEAD(&pt->link);
spin_lock_irq(&obj->lock); - if (!dma_fence_is_signaled_locked(&pt->base)) - list_add_tail(&pt->link, &obj->pt_list); + if (!dma_fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (dma_fence_get_rcu(&other->base)) { + dma_fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: spin_unlock_irq(&obj->lock);
return pt; @@ -202,8 +237,10 @@ static void timeline_fence_release(struct dma_fence *fence) unsigned long flags;
spin_lock_irqsave(fence->lock, flags); - if (!list_empty(&pt->link)) + if (!list_empty(&pt->link)) { list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } spin_unlock_irqrestore(fence->lock, flags); }
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 6a2a8e69a7d0..363c0a717b77 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H
#include <linux/list.h> +#include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/dma-fence.h>
@@ -24,7 +25,12 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging +<<<<<<< HEAD * @lock: lock protecting @pt_list and @value +======= + * @lock: lock protecting @child_list_head and fence.status + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts +>>>>>>> 0f78df23b9ec... dma-buf/sw-sync: Use an rbtree to sort fences in the timeline * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ @@ -36,6 +42,7 @@ struct sync_timeline { u64 context; int value;
+ struct rb_root pt_tree; struct list_head pt_list; spinlock_t lock;
@@ -51,10 +58,12 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) * struct sync_pt - sync_pt object * @base: base fence object * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct dma_fence base; struct list_head link; + struct rb_node node; };
#ifdef CONFIG_SW_SYNC
On Thu, Jun 29, 2017 at 01:59:24PM +0100, Chris Wilson wrote:
Often we have the task of comparing two seqno known to be on the same context, so provide a common __dma_fence_is_later().
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org
Hi Chris, Thanks for writing the code and walking me through it.
Whole set is Reviewed-by: Sean Paul seanpaul@chromium.org
I'll leave Gustavo or Sumit to apply.
Sean
Cc: Gustavo Padovan gustavo@padovan.org
include/linux/dma-fence.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a5195a7d6f77..ac5987989e9a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -336,6 +336,19 @@ dma_fence_is_signaled(struct dma_fence *fence) }
/**
- __dma_fence_is_later - return if f1 is chronologically later than f2
- @f1: [in] the first fence's seqno
- @f2: [in] the second fence's seqno from the same context
- Returns true if f1 is chronologically later than f2. Both fences must be
- from the same context, since a seqno is not common across contexts.
- */
+static inline bool __dma_fence_is_later(u32 f1, u32 f2) +{
- return (int)(f1 - f2) > 0;
+}
+/**
- dma_fence_is_later - return if f1 is chronologically later than f2
- @f1: [in] the first fence from the same context
- @f2: [in] the second fence from the same context
@@ -349,7 +362,7 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, if (WARN_ON(f1->context != f2->context)) return false;
- return (int)(f1->seqno - f2->seqno) > 0;
- return __dma_fence_is_later(f1->seqno, f2->seqno);
}
/**
2.13.1
Hi,
2017-06-29 Sean Paul seanpaul@chromium.org:
On Thu, Jun 29, 2017 at 01:59:24PM +0100, Chris Wilson wrote:
Often we have the task of comparing two seqno known to be on the same context, so provide a common __dma_fence_is_later().
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Sean Paul seanpaul@chromium.org
Hi Chris, Thanks for writing the code and walking me through it.
Whole set is Reviewed-by: Sean Paul seanpaul@chromium.org
I'll leave Gustavo or Sumit to apply.
Pushed all to drm-misc-next. Thanks!
Gustavo
dri-devel@lists.freedesktop.org