We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev; + + /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain; + + if (kref_read(&prev->refcount) > 1) + break; + + prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break; + + /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev);
- dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
On 05/08/2019 10:36, Christian König wrote:
We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com
That definitely makes sense to me, but I'm not an expert in RCU foo :/
Acked-by: Lionel Landwerlin lionel.g.landwerlin@intel.com
I guess this deserves a
Fixes: 7bf60c52e0 ("dma-buf: add new dma_fence_chain container v7")
drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence);
- struct dma_fence *prev;
- /* Manually unlink the chain as much as possible to avoid recursion
* and potential stack overflow.
*/
- while ((prev = rcu_dereference_protected(chain->prev, true))) {
struct dma_fence_chain *prev_chain;
if (kref_read(&prev->refcount) > 1)
break;
prev_chain = to_dma_fence_chain(prev);
if (!prev_chain)
break;
/* No need for atomic operations since we hold the last
* reference to prev_chain.
*/
chain->prev = prev_chain->prev;
RCU_INIT_POINTER(prev_chain->prev, NULL);
dma_fence_put(prev);
- }
- dma_fence_put(prev);
- dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
By any change, did you run into this with a CTS test whose name ends with ".chain" ? :)
-Lionel
On 05/08/2019 10:36, Christian König wrote:
We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence);
- struct dma_fence *prev;
- /* Manually unlink the chain as much as possible to avoid recursion
* and potential stack overflow.
*/
- while ((prev = rcu_dereference_protected(chain->prev, true))) {
struct dma_fence_chain *prev_chain;
if (kref_read(&prev->refcount) > 1)
break;
prev_chain = to_dma_fence_chain(prev);
if (!prev_chain)
break;
/* No need for atomic operations since we hold the last
* reference to prev_chain.
*/
chain->prev = prev_chain->prev;
RCU_INIT_POINTER(prev_chain->prev, NULL);
dma_fence_put(prev);
- }
- dma_fence_put(prev);
- dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
Not even remotely :)I tested this with my own crafted code inside the kernel.
It's probably quite some hassle to actually trigger this problem from userspace and I only found it because I created a very very long sequence chain by accident.
Christian.
Am 05.08.19 um 14:03 schrieb Lionel Landwerlin:
By any change, did you run into this with a CTS test whose name ends with ".chain" ? :)
-Lionel
On 05/08/2019 10:36, Christian König wrote:
We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev;
+ /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain;
+ if (kref_read(&prev->refcount) > 1) + break;
+ prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break;
+ /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev); - dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
That one test creates a 32k chain of fences I think. Anyway my kernel crash was unrelated ;)
-Lionel
On 05/08/2019 16:02, Christian König wrote:
Not even remotely :)I tested this with my own crafted code inside the kernel.
It's probably quite some hassle to actually trigger this problem from userspace and I only found it because I created a very very long sequence chain by accident.
Christian.
Am 05.08.19 um 14:03 schrieb Lionel Landwerlin:
By any change, did you run into this with a CTS test whose name ends with ".chain" ? :)
-Lionel
On 05/08/2019 10:36, Christian König wrote:
We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev;
+ /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain;
+ if (kref_read(&prev->refcount) > 1) + break;
+ prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break;
+ /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev); - dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
On Mon, Aug 05, 2019 at 04:32:20PM +0300, Lionel Landwerlin wrote:
That one test creates a 32k chain of fences I think. Anyway my kernel crash was unrelated ;)
Hm I'd expect that with clever use of vgem fake fences we should be able to repro this bug here with an igt. Would be real nice, any takers? -Daniel
-Lionel
On 05/08/2019 16:02, Christian König wrote:
Not even remotely :)I tested this with my own crafted code inside the kernel.
It's probably quite some hassle to actually trigger this problem from userspace and I only found it because I created a very very long sequence chain by accident.
Christian.
Am 05.08.19 um 14:03 schrieb Lionel Landwerlin:
By any change, did you run into this with a CTS test whose name ends with ".chain" ? :)
-Lionel
On 05/08/2019 10:36, Christian König wrote:
We can't free up the chain using recursion or we run into a stack overflow.
Manually free up the dangling chain nodes to avoid recursion.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-chain.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index b5089f64be2a..44a741677d25 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -178,8 +178,30 @@ static bool dma_fence_chain_signaled(struct dma_fence *fence) static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev;
+ /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain;
+ if (kref_read(&prev->refcount) > 1) + break;
+ prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break;
+ /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev); - dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); }
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org