With further testing of recent changes with lockdep and other locking checks enabled, we've found several bugs in the shrinker code and one sleep while atomic in panfrost_gem_open(). This series addresses those issues.
Rob
Rob Herring (4): drm/shmem: Do dma_unmap_sg before purging pages drm/shmem: Use mutex_trylock in drm_gem_shmem_purge drm/panfrost: Fix shrinker lockdep issues using drm_gem_shmem_purge() drm/panfrost: Fix sleeping while atomic in panfrost_gem_open
drivers/gpu/drm/drm_gem_shmem_helper.c | 13 +++++++++++-- drivers/gpu/drm/panfrost/panfrost_gem.c | 10 ++++++---- drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c | 15 ++------------- include/drm/drm_gem_shmem_helper.h | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-)
Calling dma_unmap_sg() in drm_gem_shmem_free_object() is too late if the backing pages have already been released by the shrinker. The result is the following abort:
Unable to handle kernel paging request at virtual address ffff8000098ed000 Mem abort info: ESR = 0x96000147 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000147 CM = 1, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000002f51000 [ffff8000098ed000] pgd=00000000401f8003, pud=00000000401f7003, pmd=00000000401b1003, pte=00e80000098ed712 Internal error: Oops: 96000147 [#1] SMP Modules linked in: panfrost gpu_sched CPU: 5 PID: 902 Comm: gnome-shell Not tainted 5.3.0-rc1+ #95 Hardware name: 96boards Rock960 (DT) pstate: 40000005 (nZcv daif -PAN -UAO) pc : __dma_inv_area+0x40/0x58 lr : arch_sync_dma_for_cpu+0x28/0x30 sp : ffff00001321ba30 x29: ffff00001321ba30 x28: ffff00001321bd08 x27: 0000000000000000 x26: 0000000000000009 x25: 0000ffffc1f86170 x24: 0000000000000000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000021000 x20: ffff80003bb2d810 x19: 00000000098ed000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff800023fd9480 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 00000000fffb9fff x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800023fd9c18 x7 : 0000000000000000 x6 : 00000000ffffffff x5 : 0000000000000000 x4 : 0000000000021000 Purging 5693440 bytes x3 : 000000000000003f x2 : 0000000000000040 x1 : ffff80000990e000 x0 : ffff8000098ed000 Call trace: __dma_inv_area+0x40/0x58 dma_direct_sync_single_for_cpu+0x7c/0x80 dma_direct_unmap_page+0x80/0x88 dma_direct_unmap_sg+0x54/0x80 drm_gem_shmem_free_object+0xfc/0x108 panfrost_gem_free_object+0x118/0x128 [panfrost] drm_gem_object_free+0x18/0x90 drm_gem_object_put_unlocked+0x58/0x80 drm_gem_object_handle_put_unlocked+0x64/0xb0 drm_gem_object_release_handle+0x70/0x98 drm_gem_handle_delete+0x64/0xb0 drm_gem_close_ioctl+0x28/0x38 drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x88/0x150 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc Code: 8a230000 54000060 d50b7e20 14000002 (d5087620)
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org --- drivers/gpu/drm/drm_gem_shmem_helper.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index df8f2c8adb2b..5423ec56b535 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -390,6 +390,12 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
+ dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl, + shmem->sgt->nents, DMA_BIDIRECTIONAL); + sg_free_table(shmem->sgt); + kfree(shmem->sgt); + shmem->sgt = NULL; + drm_gem_shmem_put_pages_locked(shmem);
shmem->madv = -1;
On 19/08/2019 17:12, Rob Herring wrote:
Calling dma_unmap_sg() in drm_gem_shmem_free_object() is too late if the backing pages have already been released by the shrinker. The result is the following abort:
Unable to handle kernel paging request at virtual address ffff8000098ed000 Mem abort info: ESR = 0x96000147 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000147 CM = 1, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000002f51000 [ffff8000098ed000] pgd=00000000401f8003, pud=00000000401f7003, pmd=00000000401b1003, pte=00e80000098ed712 Internal error: Oops: 96000147 [#1] SMP Modules linked in: panfrost gpu_sched CPU: 5 PID: 902 Comm: gnome-shell Not tainted 5.3.0-rc1+ #95 Hardware name: 96boards Rock960 (DT) pstate: 40000005 (nZcv daif -PAN -UAO) pc : __dma_inv_area+0x40/0x58 lr : arch_sync_dma_for_cpu+0x28/0x30 sp : ffff00001321ba30 x29: ffff00001321ba30 x28: ffff00001321bd08 x27: 0000000000000000 x26: 0000000000000009 x25: 0000ffffc1f86170 x24: 0000000000000000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000021000 x20: ffff80003bb2d810 x19: 00000000098ed000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff800023fd9480 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 00000000fffb9fff x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800023fd9c18 x7 : 0000000000000000 x6 : 00000000ffffffff x5 : 0000000000000000 x4 : 0000000000021000 Purging 5693440 bytes x3 : 000000000000003f x2 : 0000000000000040 x1 : ffff80000990e000 x0 : ffff8000098ed000 Call trace: __dma_inv_area+0x40/0x58 dma_direct_sync_single_for_cpu+0x7c/0x80 dma_direct_unmap_page+0x80/0x88 dma_direct_unmap_sg+0x54/0x80 drm_gem_shmem_free_object+0xfc/0x108 panfrost_gem_free_object+0x118/0x128 [panfrost] drm_gem_object_free+0x18/0x90 drm_gem_object_put_unlocked+0x58/0x80 drm_gem_object_handle_put_unlocked+0x64/0xb0 drm_gem_object_release_handle+0x70/0x98 drm_gem_handle_delete+0x64/0xb0 drm_gem_close_ioctl+0x28/0x38 drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x88/0x150 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc Code: 8a230000 54000060 d50b7e20 14000002 (d5087620)
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
Looks good to me:
Reviewed-by: Steven Price steven.price@arm.com
drivers/gpu/drm/drm_gem_shmem_helper.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index df8f2c8adb2b..5423ec56b535 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -390,6 +390,12 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
shmem->sgt->nents, DMA_BIDIRECTIONAL);
sg_free_table(shmem->sgt);
kfree(shmem->sgt);
shmem->sgt = NULL;
drm_gem_shmem_put_pages_locked(shmem);
shmem->madv = -1;
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
WARNING: possible circular locking dependency detected 5.3.0-rc1+ #100 Tainted: G L ------------------------------------------------------ kswapd0/171 is trying to acquire lock: 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
but task is already holding lock: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.18+0x34/0x40 fs_reclaim_acquire+0x20/0x28 __kmalloc_node+0x6c/0x4c0 kvmalloc_node+0x38/0xa8 drm_gem_get_pages+0x80/0x1d0 drm_gem_shmem_get_pages+0x58/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xe8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
-> #0 (&shmem->pages_lock){+.+.}: __lock_acquire+0xa2c/0x1d70 lock_acquire+0xdc/0x228 __mutex_lock+0x8c/0x800 mutex_lock_nested+0x1c/0x28 drm_gem_shmem_purge+0x20/0x40 panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost] do_shrink_slab+0x208/0x500 shrink_slab+0x10c/0x2c0 shrink_node+0x28c/0x4d8 balance_pgdat+0x2c8/0x570 kswapd+0x22c/0x638 kthread+0x128/0x130 ret_from_fork+0x10/0x18
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&shmem->pages_lock); lock(fs_reclaim); lock(&shmem->pages_lock);
*** DEADLOCK ***
3 locks held by kswapd0/171: #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org --- drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++-- include/drm/drm_gem_shmem_helper.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5423ec56b535..f5918707672f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- mutex_lock(&shmem->pages_lock); + if (!mutex_trylock(&shmem->pages_lock)) + return false; drm_gem_shmem_purge_locked(obj); mutex_unlock(&shmem->pages_lock); + + return true; } EXPORT_SYMBOL(drm_gem_shmem_purge);
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index ce1600fdfc3e..01f514521687 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem }
void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); -void drm_gem_shmem_purge(struct drm_gem_object *obj); +bool drm_gem_shmem_purge(struct drm_gem_object *obj);
struct drm_gem_shmem_object * drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
WARNING: possible circular locking dependency detected 5.3.0-rc1+ #100 Tainted: G L
kswapd0/171 is trying to acquire lock: 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
but task is already holding lock: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.18+0x34/0x40 fs_reclaim_acquire+0x20/0x28 __kmalloc_node+0x6c/0x4c0 kvmalloc_node+0x38/0xa8 drm_gem_get_pages+0x80/0x1d0 drm_gem_shmem_get_pages+0x58/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xe8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
-> #0 (&shmem->pages_lock){+.+.}: __lock_acquire+0xa2c/0x1d70 lock_acquire+0xdc/0x228 __mutex_lock+0x8c/0x800 mutex_lock_nested+0x1c/0x28 drm_gem_shmem_purge+0x20/0x40 panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost] do_shrink_slab+0x208/0x500 shrink_slab+0x10c/0x2c0 shrink_node+0x28c/0x4d8 balance_pgdat+0x2c8/0x570 kswapd+0x22c/0x638 kthread+0x128/0x130 ret_from_fork+0x10/0x18
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ----
lock(fs_reclaim); lock(&shmem->pages_lock); lock(fs_reclaim); lock(&shmem->pages_lock);
*** DEADLOCK ***
3 locks held by kswapd0/171: #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++-- include/drm/drm_gem_shmem_helper.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5423ec56b535..f5918707672f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- mutex_lock(&shmem->pages_lock);
- if (!mutex_trylock(&shmem->pages_lock))
Did you see my ping about cutting all the locking over to dma_resv? Would align shmem helpers with ttm a lot more, for that bright glorious future taste. Should we capture that in some todo.rst entry?
Cheers, Daniel
drm_gem_shmem_purge_locked(obj); mutex_unlock(&shmem->pages_lock);return false;
- return true;
} EXPORT_SYMBOL(drm_gem_shmem_purge);
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index ce1600fdfc3e..01f514521687 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem }
void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); -void drm_gem_shmem_purge(struct drm_gem_object *obj); +bool drm_gem_shmem_purge(struct drm_gem_object *obj);
struct drm_gem_shmem_object * drm_gem_shmem_create_with_handle(struct drm_file *file_priv, -- 2.20.1
On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter daniel@ffwll.ch wrote:
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
WARNING: possible circular locking dependency detected 5.3.0-rc1+ #100 Tainted: G L
kswapd0/171 is trying to acquire lock: 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
but task is already holding lock: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.18+0x34/0x40 fs_reclaim_acquire+0x20/0x28 __kmalloc_node+0x6c/0x4c0 kvmalloc_node+0x38/0xa8 drm_gem_get_pages+0x80/0x1d0 drm_gem_shmem_get_pages+0x58/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xe8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
-> #0 (&shmem->pages_lock){+.+.}: __lock_acquire+0xa2c/0x1d70 lock_acquire+0xdc/0x228 __mutex_lock+0x8c/0x800 mutex_lock_nested+0x1c/0x28 drm_gem_shmem_purge+0x20/0x40 panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost] do_shrink_slab+0x208/0x500 shrink_slab+0x10c/0x2c0 shrink_node+0x28c/0x4d8 balance_pgdat+0x2c8/0x570 kswapd+0x22c/0x638 kthread+0x128/0x130 ret_from_fork+0x10/0x18
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ----
lock(fs_reclaim); lock(&shmem->pages_lock); lock(fs_reclaim); lock(&shmem->pages_lock);
*** DEADLOCK ***
3 locks held by kswapd0/171: #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++-- include/drm/drm_gem_shmem_helper.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5423ec56b535..f5918707672f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
mutex_lock(&shmem->pages_lock);
if (!mutex_trylock(&shmem->pages_lock))
Did you see my ping about cutting all the locking over to dma_resv?
Yes, but you didn't reply to Rob C. about it. I guess I'll have to go figure out how reservation objects work...
Would align shmem helpers with ttm a lot more, for that bright glorious future taste. Should we capture that in some todo.rst entry?
Sure.
Rob
On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter daniel@ffwll.ch wrote:
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
WARNING: possible circular locking dependency detected 5.3.0-rc1+ #100 Tainted: G L
kswapd0/171 is trying to acquire lock: 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
but task is already holding lock: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.18+0x34/0x40 fs_reclaim_acquire+0x20/0x28 __kmalloc_node+0x6c/0x4c0 kvmalloc_node+0x38/0xa8 drm_gem_get_pages+0x80/0x1d0 drm_gem_shmem_get_pages+0x58/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xe8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
-> #0 (&shmem->pages_lock){+.+.}: __lock_acquire+0xa2c/0x1d70 lock_acquire+0xdc/0x228 __mutex_lock+0x8c/0x800 mutex_lock_nested+0x1c/0x28 drm_gem_shmem_purge+0x20/0x40 panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost] do_shrink_slab+0x208/0x500 shrink_slab+0x10c/0x2c0 shrink_node+0x28c/0x4d8 balance_pgdat+0x2c8/0x570 kswapd+0x22c/0x638 kthread+0x128/0x130 ret_from_fork+0x10/0x18
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ----
lock(fs_reclaim); lock(&shmem->pages_lock); lock(fs_reclaim); lock(&shmem->pages_lock);
*** DEADLOCK ***
3 locks held by kswapd0/171: #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++-- include/drm/drm_gem_shmem_helper.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5423ec56b535..f5918707672f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
mutex_lock(&shmem->pages_lock);
if (!mutex_trylock(&shmem->pages_lock))
Did you see my ping about cutting all the locking over to dma_resv?
Yes, but you didn't reply to Rob C. about it. I guess I'll have to go figure out how reservation objects work...
msm was the last driver that still used struct_mutex. It's a long-term dead-end, and I think with all the effort recently to create helpers for rendering drivers (shmem, vram, ttm refactoring) we should make a solid attempt to get aligned. Or did you mean that Rob Clark had some reply/questions that I didn' respond to because it fell through cracks?
Would align shmem helpers with ttm a lot more, for that bright glorious future taste. Should we capture that in some todo.rst entry?
Sure.
Cheers, Daniel
On Wed, Aug 21, 2019 at 3:23 AM Daniel Vetter daniel@ffwll.ch wrote:
On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter daniel@ffwll.ch wrote:
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
[...]
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
mutex_lock(&shmem->pages_lock);
if (!mutex_trylock(&shmem->pages_lock))
Did you see my ping about cutting all the locking over to dma_resv?
Yes, but you didn't reply to Rob C. about it. I guess I'll have to go figure out how reservation objects work...
msm was the last driver that still used struct_mutex. It's a long-term dead-end, and I think with all the effort recently to create helpers for rendering drivers (shmem, vram, ttm refactoring) we should make a solid attempt to get aligned. Or did you mean that Rob Clark had some reply/questions that I didn' respond to because it fell through cracks?
I'm not using struct_mutex, so I'm confused as to why you keep mentioning it. The list of BOs for the shrinker is protected with a mutex for the list. That list head, list mutex, and the shrinker instance all have to live at the driver level, so they can't be moved into shmem as you suggested. Agreed?
Then there is the pages_lock within the shmem BO. I assume that is what you are suggesting converting to dma_resv? I'm not really sure what that would look like. You're going to have to spell it out for me. In my brief look at it, it seems like added complexity and it's not clear to me what that buys. Also, I think it would mostly be an internal implementation detail of shmem helpers, though there is one spot in panfrost that takes the lock (2 before this series). So it's kind of orthogonal to this series.
Also, I think getting more drivers using shmem is more beneficial than aligning the implementations of the GEM helpers. We should at least be able to convert vgem and vkms I would think. Various KMS drivers too, but there's an issue around kernel mappings (or lack of). There really should be little reason for most KMS drivers to have a custom BO as CMA or shmem helpers should work.
Rob
On Wed, Aug 21, 2019 at 11:03:55AM -0500, Rob Herring wrote:
On Wed, Aug 21, 2019 at 3:23 AM Daniel Vetter daniel@ffwll.ch wrote:
On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter daniel@ffwll.ch wrote:
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
[...]
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
mutex_lock(&shmem->pages_lock);
if (!mutex_trylock(&shmem->pages_lock))
Did you see my ping about cutting all the locking over to dma_resv?
Yes, but you didn't reply to Rob C. about it. I guess I'll have to go figure out how reservation objects work...
msm was the last driver that still used struct_mutex. It's a long-term dead-end, and I think with all the effort recently to create helpers for rendering drivers (shmem, vram, ttm refactoring) we should make a solid attempt to get aligned. Or did you mean that Rob Clark had some reply/questions that I didn' respond to because it fell through cracks?
I'm not using struct_mutex, so I'm confused as to why you keep mentioning it. The list of BOs for the shrinker is protected with a mutex for the list. That list head, list mutex, and the shrinker instance all have to live at the driver level, so they can't be moved into shmem as you suggested. Agreed?
struct_mutex is just the historical baggage.
Wrt shrinker/lru, why not? We've talked about maybe moving that to make it easier to share ...
Then there is the pages_lock within the shmem BO. I assume that is what you are suggesting converting to dma_resv? I'm not really sure what that would look like. You're going to have to spell it out for me. In my brief look at it, it seems like added complexity and it's not clear to me what that buys. Also, I think it would mostly be an internal implementation detail of shmem helpers, though there is one spot in panfrost that takes the lock (2 before this series). So it's kind of orthogonal to this series.
The issue roughly is that having multiple per-bo locks gets fun, once you add in multiple drivers and dynamic dma-buf sharing. Maybe that's never going to be an issue for drivers using shmem helpers, but who knows. The cross-driver per-bo lock to untangle that maze is dma_resv, and if you then also have your own per-bo locks it can get rather interesting. Best case you end up with two locks nesting, and your own per-bo lock being fully redundant. Worst case you get different nesting depending whether you import or export. So that's roughly the context.
Of course fixing locking is going to be easier the fewer users you have. Once there's lots of code and users of it out there, it's pretty much impossible.
So yeah it would be a 1:1 replacement with all the per-bo locks you have now, and seeing how badly it bites.
Also, I think getting more drivers using shmem is more beneficial than aligning the implementations of the GEM helpers. We should at least be able to convert vgem and vkms I would think. Various KMS drivers too, but there's an issue around kernel mappings (or lack of). There really should be little reason for most KMS drivers to have a custom BO as CMA or shmem helpers should work.
Yeah agreed on this, I just want to make sure we're not doing this multiple times ... -Daniel
On 19/08/2019 17:12, Rob Herring wrote:
Lockdep reports a circular locking dependency with pages_lock taken in the shrinker callback. The deadlock can't actually happen with current users at least as a BO will never be purgeable when pages_lock is held. To be safe, let's use mutex_trylock() instead and bail if a BO is locked already.
WARNING: possible circular locking dependency detected 5.3.0-rc1+ #100 Tainted: G L
kswapd0/171 is trying to acquire lock: 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
but task is already holding lock: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.18+0x34/0x40 fs_reclaim_acquire+0x20/0x28 __kmalloc_node+0x6c/0x4c0 kvmalloc_node+0x38/0xa8 drm_gem_get_pages+0x80/0x1d0 drm_gem_shmem_get_pages+0x58/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xe8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
-> #0 (&shmem->pages_lock){+.+.}: __lock_acquire+0xa2c/0x1d70 lock_acquire+0xdc/0x228 __mutex_lock+0x8c/0x800 mutex_lock_nested+0x1c/0x28 drm_gem_shmem_purge+0x20/0x40 panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost] do_shrink_slab+0x208/0x500 shrink_slab+0x10c/0x2c0 shrink_node+0x28c/0x4d8 balance_pgdat+0x2c8/0x570 kswapd+0x22c/0x638 kthread+0x128/0x130 ret_from_fork+0x10/0x18
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ----
lock(fs_reclaim); lock(&shmem->pages_lock); lock(fs_reclaim); lock(&shmem->pages_lock);
*** DEADLOCK ***
3 locks held by kswapd0/171: #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
Seems reasonable, like you state I don't think this can actually happen, but keeping lockdep happy is a good idea.
Reviewed-by: Steven Price steven.price@arm.com
Steve
drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++-- include/drm/drm_gem_shmem_helper.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5423ec56b535..f5918707672f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
-void drm_gem_shmem_purge(struct drm_gem_object *obj) +bool drm_gem_shmem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- mutex_lock(&shmem->pages_lock);
- if (!mutex_trylock(&shmem->pages_lock))
drm_gem_shmem_purge_locked(obj); mutex_unlock(&shmem->pages_lock);return false;
- return true;
} EXPORT_SYMBOL(drm_gem_shmem_purge);
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index ce1600fdfc3e..01f514521687 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem }
void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); -void drm_gem_shmem_purge(struct drm_gem_object *obj); +bool drm_gem_shmem_purge(struct drm_gem_object *obj);
struct drm_gem_shmem_object * drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
This fixes 2 issues found by lockdep. First, drm_gem_shmem_purge() now uses mutex_trylock for the pages_lock to avoid a circular dependency.
Second, it drops the call to panfrost_mmu_unmap() which takes several locks due to runtime PM calls. The call is not necessary because the unmapping is also called in panfrost_gem_close() already.
Fixes: 013b65101315 ("drm/panfrost: Add madvise and shrinker support") Cc: Tomeu Vizoso tomeu.vizoso@collabora.com Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org --- drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index d191632b6197..cc15005dc68f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -36,17 +36,6 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc return count; }
-static void panfrost_gem_purge(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - mutex_lock(&shmem->pages_lock); - - panfrost_mmu_unmap(to_panfrost_bo(obj)); - drm_gem_shmem_purge_locked(obj); - - mutex_unlock(&shmem->pages_lock); -} - static unsigned long panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { @@ -61,8 +50,8 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) { if (freed >= sc->nr_to_scan) break; - if (drm_gem_shmem_is_purgeable(shmem)) { - panfrost_gem_purge(&shmem->base); + if (drm_gem_shmem_is_purgeable(shmem) && + drm_gem_shmem_purge(&shmem->base)) { freed += shmem->base.size >> PAGE_SHIFT; list_del_init(&shmem->madv_list); }
On 19/08/2019 17:12, Rob Herring wrote:
This fixes 2 issues found by lockdep. First, drm_gem_shmem_purge() now uses mutex_trylock for the pages_lock to avoid a circular dependency.
NIT: This is in the previous patch.
Second, it drops the call to panfrost_mmu_unmap() which takes several locks due to runtime PM calls. The call is not necessary because the unmapping is also called in panfrost_gem_close() already.
I could be completely mistaken here, but don't we need to unmap the memory from the GPU here because the backing is free? The panfrost_gem_close() call could come significantly later, by which time a malicious user space could have run some jobs on the GPU to take a look at what those mappings now point to (quite likely some other processes memory).
So this looks to me like a crafty way of observing 'random' memory in the system.
Steve
Fixes: 013b65101315 ("drm/panfrost: Add madvise and shrinker support") Cc: Tomeu Vizoso tomeu.vizoso@collabora.com Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index d191632b6197..cc15005dc68f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -36,17 +36,6 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc return count; }
-static void panfrost_gem_purge(struct drm_gem_object *obj) -{
- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- mutex_lock(&shmem->pages_lock);
- panfrost_mmu_unmap(to_panfrost_bo(obj));
- drm_gem_shmem_purge_locked(obj);
- mutex_unlock(&shmem->pages_lock);
-}
static unsigned long panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { @@ -61,8 +50,8 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) { if (freed >= sc->nr_to_scan) break;
if (drm_gem_shmem_is_purgeable(shmem)) {
panfrost_gem_purge(&shmem->base);
if (drm_gem_shmem_is_purgeable(shmem) &&
}drm_gem_shmem_purge(&shmem->base)) { freed += shmem->base.size >> PAGE_SHIFT; list_del_init(&shmem->madv_list);
We can't hold the mm_lock spinlock as panfrost_mmu_map() can sleep:
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:909 in_atomic(): 1, irqs_disabled(): 0, pid: 974, name: glmark2-es2-drm 1 lock held by glmark2-es2-drm/974: CPU: 5 PID: 974 Comm: glmark2-es2-drm Tainted: G W L 5.3.0-rc1+ #94 Hardware name: 96boards Rock960 (DT) Call trace: dump_backtrace+0x0/0x130 show_stack+0x14/0x20 dump_stack+0xc4/0x10c ___might_sleep+0x158/0x228 __might_sleep+0x50/0x88 __mutex_lock+0x58/0x800 mutex_lock_interruptible_nested+0x1c/0x28 drm_gem_shmem_get_pages+0x24/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xd8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
Fixes: 68337d0b8644 ("drm/panfrost: Restructure the GEM object creation") Cc: Tomeu Vizoso tomeu.vizoso@collabora.com Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org --- drivers/gpu/drm/panfrost/panfrost_gem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index e084bc4e9083..acb07fe06580 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -65,16 +65,18 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p spin_lock(&priv->mm_lock); ret = drm_mm_insert_node_generic(&priv->mm, &bo->node, size >> PAGE_SHIFT, align, color, 0); + spin_unlock(&priv->mm_lock); if (ret) - goto out; + return ret;
if (!bo->is_heap) { ret = panfrost_mmu_map(bo); - if (ret) + if (ret) { + spin_lock(&priv->mm_lock); drm_mm_remove_node(&bo->node); + spin_unlock(&priv->mm_lock); + } } -out: - spin_unlock(&priv->mm_lock); return ret; }
On 19/08/2019 17:12, Rob Herring wrote:
We can't hold the mm_lock spinlock as panfrost_mmu_map() can sleep:
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:909 in_atomic(): 1, irqs_disabled(): 0, pid: 974, name: glmark2-es2-drm 1 lock held by glmark2-es2-drm/974: CPU: 5 PID: 974 Comm: glmark2-es2-drm Tainted: G W L 5.3.0-rc1+ #94 Hardware name: 96boards Rock960 (DT) Call trace: dump_backtrace+0x0/0x130 show_stack+0x14/0x20 dump_stack+0xc4/0x10c ___might_sleep+0x158/0x228 __might_sleep+0x50/0x88 __mutex_lock+0x58/0x800 mutex_lock_interruptible_nested+0x1c/0x28 drm_gem_shmem_get_pages+0x24/0xa0 drm_gem_shmem_get_pages_sgt+0x48/0xd0 panfrost_mmu_map+0x38/0xf8 [panfrost] panfrost_gem_open+0xc0/0xd8 [panfrost] drm_gem_handle_create_tail+0xe8/0x198 drm_gem_handle_create+0x3c/0x50 panfrost_gem_create_with_handle+0x70/0xa0 [panfrost] panfrost_ioctl_create_bo+0x48/0x80 [panfrost] drm_ioctl_kernel+0xb8/0x110 drm_ioctl+0x244/0x3f0 do_vfs_ioctl+0xbc/0x910 ksys_ioctl+0x78/0xa8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x90/0x168 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc
Fixes: 68337d0b8644 ("drm/panfrost: Restructure the GEM object creation") Cc: Tomeu Vizoso tomeu.vizoso@collabora.com Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Rob Herring robh@kernel.org
Reviewed-by: Steven Price steven.price@arm.com
drivers/gpu/drm/panfrost/panfrost_gem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index e084bc4e9083..acb07fe06580 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -65,16 +65,18 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p spin_lock(&priv->mm_lock); ret = drm_mm_insert_node_generic(&priv->mm, &bo->node, size >> PAGE_SHIFT, align, color, 0);
- spin_unlock(&priv->mm_lock); if (ret)
goto out;
return ret;
if (!bo->is_heap) { ret = panfrost_mmu_map(bo);
if (ret)
if (ret) {
spin_lock(&priv->mm_lock); drm_mm_remove_node(&bo->node);
spin_unlock(&priv->mm_lock);
}}
-out:
- spin_unlock(&priv->mm_lock); return ret;
}
dri-devel@lists.freedesktop.org