Rearrange the code to pull the operations beore the fence->lock critical section, and remove a small amount of redundancy:
Function old new delta dma_fence_add_callback 156 145 -11
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/dma-buf/dma-fence.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 656e9ac2d028..8d5bdfce638e 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -348,29 +348,25 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling); int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func) { - unsigned long flags; - int ret = 0; + int ret = -ENOENT;
if (WARN_ON(!fence || !func)) return -EINVAL;
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - INIT_LIST_HEAD(&cb->node); - return -ENOENT; - } + cb->func = func; + INIT_LIST_HEAD(&cb->node);
- spin_lock_irqsave(fence->lock, flags); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + unsigned long flags;
- if (__dma_fence_enable_signaling(fence)) { - cb->func = func; - list_add_tail(&cb->node, &fence->cb_list); - } else { - INIT_LIST_HEAD(&cb->node); - ret = -ENOENT; + spin_lock_irqsave(fence->lock, flags); + if (__dma_fence_enable_signaling(fence)) { + list_add_tail(&cb->node, &fence->cb_list); + ret = 0; + } + spin_unlock_irqrestore(fence->lock, flags); }
- spin_unlock_irqrestore(fence->lock, flags); - return ret; } EXPORT_SYMBOL(dma_fence_add_callback);
When waiting with a callback on the stack, we must remove the callback upon wait completion. Since this will be notified by the fence signal callback, the removal often contends with the fence->lock being held by the signaler. We can look at the list entry to see if the callback was already signaled before we take the contended lock.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/dma-buf/dma-fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d5bdfce638e..b910d7bc0854 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -420,6 +420,9 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
+ if (list_empty(&cb->node)) + return false; + spin_lock_irqsave(fence->lock, flags);
ret = !list_empty(&cb->node);
On Wed, Jul 15, 2020 at 11:49:05AM +0100, Chris Wilson wrote:
When waiting with a callback on the stack, we must remove the callback upon wait completion. Since this will be notified by the fence signal callback, the removal often contends with the fence->lock being held by the signaler. We can look at the list entry to see if the callback was already signaled before we take the contended lock.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/dma-buf/dma-fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d5bdfce638e..b910d7bc0854 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -420,6 +420,9 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
- if (list_empty(&cb->node))
I was about to say "but the races" but then noticed that Paul fixed list_empty to use READ_ONCE like 5 years ago :-)
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
return false;
spin_lock_irqsave(fence->lock, flags);
ret = !list_empty(&cb->node);
-- 2.20.1
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Daniel Vetter (2020-07-15 13:10:22)
On Wed, Jul 15, 2020 at 11:49:05AM +0100, Chris Wilson wrote:
When waiting with a callback on the stack, we must remove the callback upon wait completion. Since this will be notified by the fence signal callback, the removal often contends with the fence->lock being held by the signaler. We can look at the list entry to see if the callback was already signaled before we take the contended lock.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/dma-buf/dma-fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d5bdfce638e..b910d7bc0854 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -420,6 +420,9 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
if (list_empty(&cb->node))
I was about to say "but the races" but then noticed that Paul fixed list_empty to use READ_ONCE like 5 years ago :-)
I'm always going "when exactly do we need list_empty_careful()"?
We can rule out a concurrent dma_fence_add_callback() for the same dma_fence_cb, as that is a lost cause. So we only have to worry about the concurrent list_del_init() from dma_fence_signal_locked(). So it's the timing of list_del_init(): WRITE_ONCE(list->next, list) vs READ_ONCE(list->next) == list and we don't need to care about the trailing instructions in list_del_init()...
Wait that trailing instruction is actually important here if the dma_fence_cb is on the stack, or other imminent free.
Ok, this does need to be list_empty_careful! -Chris
Quoting Chris Wilson (2020-07-15 13:21:43)
Quoting Daniel Vetter (2020-07-15 13:10:22)
On Wed, Jul 15, 2020 at 11:49:05AM +0100, Chris Wilson wrote:
When waiting with a callback on the stack, we must remove the callback upon wait completion. Since this will be notified by the fence signal callback, the removal often contends with the fence->lock being held by the signaler. We can look at the list entry to see if the callback was already signaled before we take the contended lock.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/dma-buf/dma-fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d5bdfce638e..b910d7bc0854 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -420,6 +420,9 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
if (list_empty(&cb->node))
I was about to say "but the races" but then noticed that Paul fixed list_empty to use READ_ONCE like 5 years ago :-)
I'm always going "when exactly do we need list_empty_careful()"?
We can rule out a concurrent dma_fence_add_callback() for the same dma_fence_cb, as that is a lost cause. So we only have to worry about the concurrent list_del_init() from dma_fence_signal_locked(). So it's the timing of list_del_init(): WRITE_ONCE(list->next, list) vs READ_ONCE(list->next) == list and we don't need to care about the trailing instructions in list_del_init()...
Wait that trailing instruction is actually important here if the dma_fence_cb is on the stack, or other imminent free.
Ok, this does need to be list_empty_careful!
There's a further problem in that we call INIT_LIST_HEAD on the dma_fence_cb before the signal callback. So even if list_empty_careful() confirms the dma_fence_cb to be completely decoupled, the containing struct may still be inuse. -Chris
On Wed, Jul 15, 2020 at 2:40 PM Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Chris Wilson (2020-07-15 13:21:43)
Quoting Daniel Vetter (2020-07-15 13:10:22)
On Wed, Jul 15, 2020 at 11:49:05AM +0100, Chris Wilson wrote:
When waiting with a callback on the stack, we must remove the callback upon wait completion. Since this will be notified by the fence signal callback, the removal often contends with the fence->lock being held by the signaler. We can look at the list entry to see if the callback was already signaled before we take the contended lock.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/dma-buf/dma-fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d5bdfce638e..b910d7bc0854 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -420,6 +420,9 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
if (list_empty(&cb->node))
I was about to say "but the races" but then noticed that Paul fixed list_empty to use READ_ONCE like 5 years ago :-)
I'm always going "when exactly do we need list_empty_careful()"?
We can rule out a concurrent dma_fence_add_callback() for the same dma_fence_cb, as that is a lost cause. So we only have to worry about the concurrent list_del_init() from dma_fence_signal_locked(). So it's the timing of list_del_init(): WRITE_ONCE(list->next, list) vs READ_ONCE(list->next) == list and we don't need to care about the trailing instructions in list_del_init()...
Wait that trailing instruction is actually important here if the dma_fence_cb is on the stack, or other imminent free.
Ok, this does need to be list_empty_careful!
Hm, tbh I'm not really clear what list_empty_careful does on top. Seems to lack READ_ONCE, so either some really big trickery with dependencies is going on, or I'm not even sure how this works without locks.
I've now stared at list_empty_careful and a bunch of users quite a bit, and I have now idea when you'd want to use it. Lockless you want a READ_ONCE I think and a simple check, so list_empty. And just accept that any time you race you'll go into the locked slowpath for "list isn't empty". Also only works if the list_empty case is the "nothing to do, job already done" case, since the other one just isn't guaranteed to be accurate.
list_empty_careful just wraps a bunch more magic around that will make this both worse, and more racy (if the compiler feels creative) because no READ_ONCE or anything like that. I don't get what that thing is for ...
There's a further problem in that we call INIT_LIST_HEAD on the dma_fence_cb before the signal callback. So even if list_empty_careful() confirms the dma_fence_cb to be completely decoupled, the containing struct may still be inuse.
The kerneldoc of dma_fence_remove_callback() already has a very stern warning that this will blow up if you don't hold a full reference or otherwise control the lifetime of this stuff. So I don't think we have to worry about any of that. Or do you mean something else? -Daniel
Quoting Daniel Vetter (2020-07-15 15:03:34)
On Wed, Jul 15, 2020 at 2:40 PM Chris Wilson chris@chris-wilson.co.uk wrote:
There's a further problem in that we call INIT_LIST_HEAD on the dma_fence_cb before the signal callback. So even if list_empty_careful() confirms the dma_fence_cb to be completely decoupled, the containing struct may still be inuse.
The kerneldoc of dma_fence_remove_callback() already has a very stern warning that this will blow up if you don't hold a full reference or otherwise control the lifetime of this stuff. So I don't think we have to worry about any of that. Or do you mean something else?
It's the struct dma_fence_cb itself that may be freed/reused. Consider dma_fence_default_wait(). That uses struct default_wait_cb on the stack, so in order to ensure that the callback is completed the list_empty check has to remain under the spinlock, or else dma_fence_default_wait_cb() can still be dereferencing wait->task as the function returns.
So currently it is:
signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; unsigned long flags; signed long ret = timeout ? timeout : 1;
spin_lock_irqsave(fence->lock, flags);
if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; }
if (!__dma_fence_enable_signaling(fence)) goto out;
if (!timeout) { ret = 0; goto out; }
cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list);
while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(fence->lock, flags);
ret = schedule_timeout(ret);
spin_lock_irqsave(fence->lock, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; }
if (!list_empty(&cb.base.node)) list_del(&cb.base.node); __set_current_state(TASK_RUNNING);
out: spin_unlock_irqrestore(fence->lock, flags); return ret; }
but it could be written as:
signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; int state = intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
cb.task = current; if (dma_fence_add_callback(fence, &cb.base, dma_fence_default_wait_cb)) return timeout ? timeout : 1;
for (;;) { set_current_state(state);
if (dma_fence_is_signaled(fence)) { timeout = timeout ? timeout : 1; break; }
if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; }
if (!timeout) break;
timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING);
dma_fence_remove_callback(fence, &cb.base);
return timeout; } -Chris
On Wed, Jul 15, 2020 at 4:37 PM Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Daniel Vetter (2020-07-15 15:03:34)
On Wed, Jul 15, 2020 at 2:40 PM Chris Wilson chris@chris-wilson.co.uk wrote:
There's a further problem in that we call INIT_LIST_HEAD on the dma_fence_cb before the signal callback. So even if list_empty_careful() confirms the dma_fence_cb to be completely decoupled, the containing struct may still be inuse.
The kerneldoc of dma_fence_remove_callback() already has a very stern warning that this will blow up if you don't hold a full reference or otherwise control the lifetime of this stuff. So I don't think we have to worry about any of that. Or do you mean something else?
It's the struct dma_fence_cb itself that may be freed/reused. Consider dma_fence_default_wait(). That uses struct default_wait_cb on the stack, so in order to ensure that the callback is completed the list_empty check has to remain under the spinlock, or else dma_fence_default_wait_cb() can still be dereferencing wait->task as the function returns.
The current implementation of remove_callback doesn't work if you don't own the callback structure. Or control its lifetime through some other means.
So if we have callers removing other callback structures, that just doesn't work, you can only remove your own.
From a quick spot check across a few callers we don't seem to have a
problem here, all current callers for this function are in various wait functions (driver specific, or multi fence waits, stuff like that). -Daniel
So currently it is:
signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; unsigned long flags; signed long ret = timeout ? timeout : 1;
spin_lock_irqsave(fence->lock, flags); if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } if (!__dma_fence_enable_signaling(fence)) goto out; if (!timeout) { ret = 0; goto out; } cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list); while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(fence->lock, flags); ret = schedule_timeout(ret); spin_lock_irqsave(fence->lock, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } if (!list_empty(&cb.base.node)) list_del(&cb.base.node); __set_current_state(TASK_RUNNING);
out: spin_unlock_irqrestore(fence->lock, flags); return ret; }
but it could be written as:
signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; int state = intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
cb.task = current; if (dma_fence_add_callback(fence, &cb.base, dma_fence_default_wait_cb)) return timeout ? timeout : 1; for (;;) { set_current_state(state); if (dma_fence_is_signaled(fence)) { timeout = timeout ? timeout : 1; break; } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } if (!timeout) break; timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); dma_fence_remove_callback(fence, &cb.base); return timeout;
} -Chris
dri-devel@lists.freedesktop.org