Thomas has noticed that the lockdep was broken in vmwgfx. It was broken during the pci initialization rework. This fixes the breakage by making sure we initialize the locking code before doing anything else. This was independently spotted and fixed by Tetsuo Handa as well.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Cc: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com Fixes: 8772c0bb58bbf98a ("drm/vmwgfx: Cleanup pci resource allocation") --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 17 ++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 -- 2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index dd69b51c40e4..6fa24645fbbf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -712,17 +712,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->last_read_seqno = (uint32_t) -100; dev_priv->drm.dev_private = dev_priv;
- ret = vmw_setup_pci_resources(dev_priv, pci_id); - if (ret) - return ret; - ret = vmw_detect_version(dev_priv); - if (ret) - goto out_no_pci_or_version; - mutex_init(&dev_priv->cmdbuf_mutex); - mutex_init(&dev_priv->release_mutex); mutex_init(&dev_priv->binding_mutex); - mutex_init(&dev_priv->global_kms_state_mutex); ttm_lock_init(&dev_priv->reservation_sem); spin_lock_init(&dev_priv->resource_lock); spin_lock_init(&dev_priv->hw_lock); @@ -730,6 +721,14 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) spin_lock_init(&dev_priv->cap_lock); spin_lock_init(&dev_priv->cursor_lock);
+ ret = vmw_setup_pci_resources(dev_priv, pci_id); + if (ret) + return ret; + ret = vmw_detect_version(dev_priv); + if (ret) + goto out_no_pci_or_version; + + for (i = vmw_res_context; i < vmw_res_max; ++i) { idr_init(&dev_priv->res_idr[i]); INIT_LIST_HEAD(&dev_priv->res_lru[i]); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 336f5086ca26..8087a9013455 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -529,7 +529,6 @@ struct vmw_private { struct vmw_overlay *overlay_priv; struct drm_property *hotplug_mode_update_property; struct drm_property *implicit_placement_property; - struct mutex global_kms_state_mutex; spinlock_t cursor_lock; struct drm_atomic_state *suspend_state;
@@ -592,7 +591,6 @@ struct vmw_private { bool refuse_hibernation; bool suspend_locked;
- struct mutex release_mutex; atomic_t num_fifo_resources;
/*
Quite often it's a little hard to tell if reservations are already held in code paths that unpin bo's. While our pinning/unpinning code should be more explicit that requires a substential amount of work so instead we can avoid the issues by making sure we try to reserve before unpinning. Because we unpin those bo's only on destruction/error paths just that check tells us if we're already reserved or not and allows to cleanly unpin.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..03bef9c17e56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf) return srf; }
+/* + * vmw_bo_unpin_safe - currently pinning requires a reservation to be held + * but sometimes it's hard to tell if we're in a callback whose parent + * is already holding a reservation, to avoid deadloacks we have to try + * to get a reservation explicitly to also try to avoid messing up the + * internal ttm lru bo list + */ +static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo) +{ + bool locked = dma_resv_trylock(bo->base.resv); + ttm_bo_unpin(bo); + if (locked) + dma_resv_unlock(bo->base.resv); +} + static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) { struct vmw_buffer_object *tmp_buf = *buf; @@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { if (tmp_buf->base.pin_count > 0) - ttm_bo_unpin(&tmp_buf->base); + vmw_bo_unpin_safe(&tmp_buf->base); ttm_bo_put(&tmp_buf->base); } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index a0b53141dded..23ffeb2dd6e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -277,7 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, &batch->otables[i]); }
- ttm_bo_unpin(batch->otable_bo); + vmw_bo_unpin_safe(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; return ret; @@ -343,7 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, vmw_bo_fence_single(bo, NULL); ttm_bo_unreserve(bo);
- ttm_bo_unpin(batch->otable_bo); + vmw_bo_unpin_safe(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; } @@ -530,7 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, void vmw_mob_destroy(struct vmw_mob *mob) { if (mob->pt_bo) { - ttm_bo_unpin(mob->pt_bo); + vmw_bo_unpin_safe(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; } @@ -646,7 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); if (pt_set_up) { - ttm_bo_unpin(mob->pt_bo); + vmw_bo_unpin_safe(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; }
Hi, Zack,
On 4/8/21 7:22 PM, Zack Rusin wrote:
Quite often it's a little hard to tell if reservations are already held in code paths that unpin bo's. While our pinning/unpinning code should be more explicit that requires a substential amount of work so instead we can avoid the issues by making sure we try to reserve before unpinning. Because we unpin those bo's only on destruction/error paths just that check tells us if we're already reserved or not and allows to cleanly unpin.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..03bef9c17e56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf) return srf; }
+/*
- vmw_bo_unpin_safe - currently pinning requires a reservation to be held
- but sometimes it's hard to tell if we're in a callback whose parent
- is already holding a reservation, to avoid deadloacks we have to try
- to get a reservation explicitly to also try to avoid messing up the
- internal ttm lru bo list
- */
+static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo) +{
- bool locked = dma_resv_trylock(bo->base.resv);
Isn't there a chance another thread is holding the lock and releasing it at this position?
- ttm_bo_unpin(bo);
- if (locked)
dma_resv_unlock(bo->base.resv);
+}
- static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) { struct vmw_buffer_object *tmp_buf = *buf;
@@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { if (tmp_buf->base.pin_count > 0)
ttm_bo_unpin(&tmp_buf->base);
vmw_bo_unpin_safe(&tmp_buf->base);
Hmm. If execbuf is referencing a buffer that someone else has pinned, wouldn't execbuf incorrectly unpin that buffer when calling unreference? Would it perhaps be possible to if needed, use the TTM release_notify callback to unpin any leaking pins similar to what's done in ttm_bo_release? Although that I guess goes somewhat against that recently added WARN_ON_ONCE.
ttm_bo_put(&tmp_buf->base);
} } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index a0b53141dded..23ffeb2dd6e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -277,7 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, &batch->otables[i]); }
- ttm_bo_unpin(batch->otable_bo);
- vmw_bo_unpin_safe(batch->otable_bo);
Could it be we're the only user here? If so safe to reserve and unpin.
ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; return ret; @@ -343,7 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, vmw_bo_fence_single(bo, NULL); ttm_bo_unreserve(bo);
- ttm_bo_unpin(batch->otable_bo);
- vmw_bo_unpin_safe(batch->otable_bo);
Would it be possible to just move ttm_bo_unpin() above the ttm_bo_unreserve() above?
ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; } @@ -530,7 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, void vmw_mob_destroy(struct vmw_mob *mob) { if (mob->pt_bo) {
ttm_bo_unpin(mob->pt_bo);
ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; }vmw_bo_unpin_safe(mob->pt_bo);
@@ -646,7 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); if (pt_set_up) {
ttm_bo_unpin(mob->pt_bo);
vmw_bo_unpin_safe(mob->pt_bo);
Perhaps the same here?
ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL;
}
/Thomas
On 4/9/21 3:38 AM, Thomas Hellström (Intel) wrote:
Hi, Zack,
On 4/8/21 7:22 PM, Zack Rusin wrote:
Quite often it's a little hard to tell if reservations are already held in code paths that unpin bo's. While our pinning/unpinning code should be more explicit that requires a substential amount of work so instead we can avoid the issues by making sure we try to reserve before unpinning. Because we unpin those bo's only on destruction/error paths just that check tells us if we're already reserved or not and allows to cleanly unpin.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..03bef9c17e56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf) return srf; } +/*
- vmw_bo_unpin_safe - currently pinning requires a reservation to be
held
- but sometimes it's hard to tell if we're in a callback whose parent
- is already holding a reservation, to avoid deadloacks we have to try
- to get a reservation explicitly to also try to avoid messing up the
- internal ttm lru bo list
- */
+static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo) +{ + bool locked = dma_resv_trylock(bo->base.resv);
Isn't there a chance another thread is holding the lock and releasing it at this position?
Yes, it was definitely possible. In v2 I implemented it the way Daniel suggested, I think it's a decent compromise. Thanks for taking a look at this!
z
On Thu, Apr 08, 2021 at 01:22:45PM -0400, Zack Rusin wrote:
Quite often it's a little hard to tell if reservations are already held in code paths that unpin bo's. While our pinning/unpinning code should be more explicit that requires a substential amount of work so instead we can avoid the issues by making sure we try to reserve before unpinning. Because we unpin those bo's only on destruction/error paths just that check tells us if we're already reserved or not and allows to cleanly unpin.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..03bef9c17e56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf) return srf; }
+/*
- vmw_bo_unpin_safe - currently pinning requires a reservation to be held
- but sometimes it's hard to tell if we're in a callback whose parent
- is already holding a reservation, to avoid deadloacks we have to try
- to get a reservation explicitly to also try to avoid messing up the
- internal ttm lru bo list
- */
+static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo) +{
- bool locked = dma_resv_trylock(bo->base.resv);
- ttm_bo_unpin(bo);
- if (locked)
dma_resv_unlock(bo->base.resv);
+}
static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) { struct vmw_buffer_object *tmp_buf = *buf; @@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { if (tmp_buf->base.pin_count > 0)
ttm_bo_unpin(&tmp_buf->base);
vmw_bo_unpin_safe(&tmp_buf->base);
So in the unreference callback I understand it might be tricky and you need this, but do all the others below really don't know whether the bo is locked or not?
Also _trylock is a bit much yolo locking here, I'd minimally put a comment there that we don't actually care about races, it's just to shut up ttm locking checks. Whether that's true or not is another question I think.
And if it's just this case here, maybe inline the trylock, and for the others do a vmw_bo_unpin_unlocked which unconditionally grabs the lock? -Daniel
ttm_bo_put(&tmp_buf->base);
} } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index a0b53141dded..23ffeb2dd6e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -277,7 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, &batch->otables[i]); }
- ttm_bo_unpin(batch->otable_bo);
- vmw_bo_unpin_safe(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; return ret;
@@ -343,7 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, vmw_bo_fence_single(bo, NULL); ttm_bo_unreserve(bo);
- ttm_bo_unpin(batch->otable_bo);
- vmw_bo_unpin_safe(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL;
} @@ -530,7 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, void vmw_mob_destroy(struct vmw_mob *mob) { if (mob->pt_bo) {
ttm_bo_unpin(mob->pt_bo);
ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; }vmw_bo_unpin_safe(mob->pt_bo);
@@ -646,7 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); if (pt_set_up) {
ttm_bo_unpin(mob->pt_bo);
ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; }vmw_bo_unpin_safe(mob->pt_bo);
-- 2.27.0
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 4/9/21 3:40 AM, Daniel Vetter wrote:
On Thu, Apr 08, 2021 at 01:22:45PM -0400, Zack Rusin wrote:
Quite often it's a little hard to tell if reservations are already held in code paths that unpin bo's. While our pinning/unpinning code should be more explicit that requires a substential amount of work so instead we can avoid the issues by making sure we try to reserve before unpinning. Because we unpin those bo's only on destruction/error paths just that check tells us if we're already reserved or not and allows to cleanly unpin.
Reviewed-by: Martin Krastev krastevm@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin zackr@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..03bef9c17e56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf) return srf; }
+/*
- vmw_bo_unpin_safe - currently pinning requires a reservation to be held
- but sometimes it's hard to tell if we're in a callback whose parent
- is already holding a reservation, to avoid deadloacks we have to try
- to get a reservation explicitly to also try to avoid messing up the
- internal ttm lru bo list
- */
+static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo) +{
- bool locked = dma_resv_trylock(bo->base.resv);
- ttm_bo_unpin(bo);
- if (locked)
dma_resv_unlock(bo->base.resv);
+}
- static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) { struct vmw_buffer_object *tmp_buf = *buf;
@@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { if (tmp_buf->base.pin_count > 0)
ttm_bo_unpin(&tmp_buf->base);
vmw_bo_unpin_safe(&tmp_buf->base);
So in the unreference callback I understand it might be tricky and you need this, but do all the others below really don't know whether the bo is locked or not?
TBH, I just liked having all those paths going through the same functions. I agree that it wasn't really correct or particularly graceful.
Also _trylock is a bit much yolo locking here, I'd minimally put a comment there that we don't actually care about races, it's just to shut up ttm locking checks. Whether that's true or not is another question I think.
And if it's just this case here, maybe inline the trylock, and for the others do a vmw_bo_unpin_unlocked which unconditionally grabs the lock?
Fair enough, I think that's a good suggestion, so I went ahead and did just that.
z
dri-devel@lists.freedesktop.org