The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY;
+ dma_resv_assert_held(bo->base.resv); + + if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + } + if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
- ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); out:
/* @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo); + + /* Don't break locking rules. */ + WARN_ON(ret == -EBUSY); return ret; }
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 460953dcad11..eaa7487ae404 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { list_for_each_entry(bo, &man->lru[j], lru) { - uint32_t num_pages; + pgoff_t num_pages;
- if (!bo->ttm || - bo->ttm->page_flags & TTM_PAGE_FLAG_SG || - bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + if (!READ_ONCE(bo->ttm)) continue;
- num_pages = bo->ttm->num_pages; + num_pages = bo->base.size >> PAGE_SHIFT; ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ if (!ret)
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY;
- dma_resv_assert_held(bo->base.resv);
- if (!bo->ttm ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
if (locked)
dma_resv_unlock(bo->base.resv);
return -EBUSY;
- }
- if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv);
@@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
- ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
- if (ttm_tt_is_populated(bo->ttm))
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
Why not move that to the check above as well?
Christian.
out:
/* @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo);
- /* Don't break locking rules. */
- WARN_ON(ret == -EBUSY); return ret; }
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 460953dcad11..eaa7487ae404 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { list_for_each_entry(bo, &man->lru[j], lru) {
uint32_t num_pages;
pgoff_t num_pages;
if (!bo->ttm ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
if (!READ_ONCE(bo->ttm)) continue;
num_pages = bo->ttm->num_pages;
num_pages = bo->base.size >> PAGE_SHIFT; ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ if (!ret)
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY; + dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); - ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re-added,
Why not move that to the check above as well?
Because the BO may become unpopulated in swap_notify(), i915, like vmwgfx, sometimes sets up gpu bindings from system, and when we get a notification from user-space that those are purgeable, we don't want to purge immediately but wait for a potential swapout.
/Thomas
Christian.
out: /* @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo);
+ /* Don't break locking rules. */ + WARN_ON(ret == -EBUSY); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 460953dcad11..eaa7487ae404 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { list_for_each_entry(bo, &man->lru[j], lru) { - uint32_t num_pages; + pgoff_t num_pages; - if (!bo->ttm || - bo->ttm->page_flags & TTM_PAGE_FLAG_SG || - bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + if (!READ_ONCE(bo->ttm)) continue; - num_pages = bo->ttm->num_pages; + num_pages = bo->base.size >> PAGE_SHIFT; ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ if (!ret)
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström
thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY; + dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); - ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re-added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
/Thomas
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström
thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY;
+ dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
- ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re-added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
Christian.
/Thomas
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström
thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY; + dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); - ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re- added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
/Thomas
Christian.
/Thomas
Am 27.05.21 um 17:51 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström
thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY;
+ dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
- ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re- added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
I want to avoid removing BOs from the LRU as much as possible since we forgot on multiple places that we want to re-add them.
Conceptual I think the swapped BOs should have a separate memory domain, this way we can ignore them cleanly when swapping things out.
Going to pick this patch up, modifying it a bit more and then pushing it to drm-misc-fixes for upstreaming.
Thanks, Christian.
/Thomas
Christian.
/Thomas
On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
Am 27.05.21 um 17:51 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström: > The swapping code was dereference bo->ttm pointers > without > having > the > dma-resv lock held. Also it might try to swap out > unpopulated > bos. > > Fix this by moving the bo->ttm dereference until we have > the > reservation > lock. Check that the ttm_tt is populated after the > swap_notify > callback. > > Signed-off-by: Thomas Hellström > thomas.hellstrom@linux.intel.com > --- > drivers/gpu/drm/ttm/ttm_bo.c | 16 > +++++++++++++++- > drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- > 2 files changed, 18 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c > b/drivers/gpu/drm/ttm/ttm_bo.c > index 9f53506a82fc..86213d37657b 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct > ttm_buffer_object > *bo, struct ttm_operation_ctx *ctx, > if (!ttm_bo_evict_swapout_allowable(bo, ctx, > &place, > &locked, NULL)) > return -EBUSY; > > + dma_resv_assert_held(bo->base.resv); > + > + if (!bo->ttm || > + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || > + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) > { > + if (locked) > + dma_resv_unlock(bo->base.resv); > + return -EBUSY; > + } > + > if (!ttm_bo_get_unless_zero(bo)) { > if (locked) > dma_resv_unlock(bo->base.resv); > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct > ttm_buffer_object > *bo, struct ttm_operation_ctx *ctx, > if (bo->bdev->funcs->swap_notify) > bo->bdev->funcs->swap_notify(bo); > > - ret = ttm_tt_swapout(bo->bdev, bo->ttm, > gfp_flags); > + if (ttm_tt_is_populated(bo->ttm)) > + ret = ttm_tt_swapout(bo->bdev, bo->ttm, > gfp_flags); Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re- added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
I want to avoid removing BOs from the LRU as much as possible since we forgot on multiple places that we want to re-add them.
Conceptual I think the swapped BOs should have a separate memory domain, this way we can ignore them cleanly when swapping things out.
Yes, that would of course work as well. Keeping them on the system LRU is IMO highly undesirable.
Going to pick this patch up, modifying it a bit more and then pushing it to drm-misc-fixes for upstreaming.
OK, I dropped the TTM fix for the purge-in-swap-notify from the i915 series, hoping that the reworked variant of this patch lands first.
Thanks, Thomas
Thanks, Christian.
/Thomas
Christian.
/Thomas
Am 28.05.21 um 09:33 schrieb Thomas Hellström:
On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
Am 27.05.21 um 17:51 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote: > Am 27.05.21 um 16:19 schrieb Thomas Hellström: >> The swapping code was dereference bo->ttm pointers >> without >> having >> the >> dma-resv lock held. Also it might try to swap out >> unpopulated >> bos. >> >> Fix this by moving the bo->ttm dereference until we have >> the >> reservation >> lock. Check that the ttm_tt is populated after the >> swap_notify >> callback. >> >> Signed-off-by: Thomas Hellström >> thomas.hellstrom@linux.intel.com >> --- >> drivers/gpu/drm/ttm/ttm_bo.c | 16 >> +++++++++++++++- >> drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- >> 2 files changed, 18 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >> b/drivers/gpu/drm/ttm/ttm_bo.c >> index 9f53506a82fc..86213d37657b 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct >> ttm_buffer_object >> *bo, struct ttm_operation_ctx *ctx, >> if (!ttm_bo_evict_swapout_allowable(bo, ctx, >> &place, >> &locked, NULL)) >> return -EBUSY; >> >> + dma_resv_assert_held(bo->base.resv); >> + >> + if (!bo->ttm || >> + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || >> + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) >> { >> + if (locked) >> + dma_resv_unlock(bo->base.resv); >> + return -EBUSY; >> + } >> + >> if (!ttm_bo_get_unless_zero(bo)) { >> if (locked) >> dma_resv_unlock(bo->base.resv); >> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct >> ttm_buffer_object >> *bo, struct ttm_operation_ctx *ctx, >> if (bo->bdev->funcs->swap_notify) >> bo->bdev->funcs->swap_notify(bo); >> >> - ret = ttm_tt_swapout(bo->bdev, bo->ttm, >> gfp_flags); >> + if (ttm_tt_is_populated(bo->ttm)) >> + ret = ttm_tt_swapout(bo->bdev, bo->ttm, >> gfp_flags); > Exactly that is what I won't recommend. We would try to > swap > out > the > same BO over and over again with that. But we wouldn't since the BO is taken off the LRU and never re- added,
In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
I want to avoid removing BOs from the LRU as much as possible since we forgot on multiple places that we want to re-add them.
Conceptual I think the swapped BOs should have a separate memory domain, this way we can ignore them cleanly when swapping things out.
Yes, that would of course work as well. Keeping them on the system LRU is IMO highly undesirable.
Going to pick this patch up, modifying it a bit more and then pushing it to drm-misc-fixes for upstreaming.
OK, I dropped the TTM fix for the purge-in-swap-notify from the i915 series, hoping that the reworked variant of this patch lands first.
You will still need to add the second ttm_tt_populated() check since I dropped that for the back which I want to push to -fixes.
Regards, Christian.
Thanks, Thomas
Thanks, Christian.
/Thomas
Christian.
/Thomas
On 5/28/21 4:10 PM, Christian König wrote:
Am 28.05.21 um 09:33 schrieb Thomas Hellström:
On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
Am 27.05.21 um 17:51 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote: > On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote: >> Am 27.05.21 um 16:19 schrieb Thomas Hellström: >>> The swapping code was dereference bo->ttm pointers >>> without >>> having >>> the >>> dma-resv lock held. Also it might try to swap out >>> unpopulated >>> bos. >>> >>> Fix this by moving the bo->ttm dereference until we have >>> the >>> reservation >>> lock. Check that the ttm_tt is populated after the >>> swap_notify >>> callback. >>> >>> Signed-off-by: Thomas Hellström >>> thomas.hellstrom@linux.intel.com >>> --- >>> drivers/gpu/drm/ttm/ttm_bo.c | 16 >>> +++++++++++++++- >>> drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- >>> 2 files changed, 18 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >>> b/drivers/gpu/drm/ttm/ttm_bo.c >>> index 9f53506a82fc..86213d37657b 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_bo.c >>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct >>> ttm_buffer_object >>> *bo, struct ttm_operation_ctx *ctx, >>> if (!ttm_bo_evict_swapout_allowable(bo, ctx, >>> &place, >>> &locked, NULL)) >>> return -EBUSY; >>> + dma_resv_assert_held(bo->base.resv); >>> + >>> + if (!bo->ttm || >>> + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || >>> + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) >>> { >>> + if (locked) >>> + dma_resv_unlock(bo->base.resv); >>> + return -EBUSY; >>> + } >>> + >>> if (!ttm_bo_get_unless_zero(bo)) { >>> if (locked) >>> dma_resv_unlock(bo->base.resv); >>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct >>> ttm_buffer_object >>> *bo, struct ttm_operation_ctx *ctx, >>> if (bo->bdev->funcs->swap_notify) >>> bo->bdev->funcs->swap_notify(bo); >>> - ret = ttm_tt_swapout(bo->bdev, bo->ttm, >>> gfp_flags); >>> + if (ttm_tt_is_populated(bo->ttm)) >>> + ret = ttm_tt_swapout(bo->bdev, bo->ttm, >>> gfp_flags); >> Exactly that is what I won't recommend. We would try to >> swap >> out >> the >> same BO over and over again with that. > But we wouldn't since the BO is taken off the LRU and never > re- > added, > > In fact, we'd probably might want to take the !bo->ttm bos off the LRU as well..
No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
I want to avoid removing BOs from the LRU as much as possible since we forgot on multiple places that we want to re-add them.
Conceptual I think the swapped BOs should have a separate memory domain, this way we can ignore them cleanly when swapping things out.
Yes, that would of course work as well. Keeping them on the system LRU is IMO highly undesirable.
Going to pick this patch up, modifying it a bit more and then pushing it to drm-misc-fixes for upstreaming.
OK, I dropped the TTM fix for the purge-in-swap-notify from the i915 series, hoping that the reworked variant of this patch lands first.
You will still need to add the second ttm_tt_populated() check since I dropped that for the back which I want to push to -fixes.
Regards, Christian.
OK, great. then you have my S-O-B on this patch.
BTW that original patch that added the ttm_tt_is_populated() was considered "LGTM" by you, except for this ttm_tt_is_populated(). So do I have an Acked-by: on that now?
That is
https://patchwork.freedesktop.org/patch/435833/?series=90681&rev=2
plus the check added?
Thanks,
Thomas
Thanks, Thomas
Thanks, Christian.
/Thomas
Christian.
/Thomas
Am 28.05.21 um 16:17 schrieb Thomas Hellström:
On 5/28/21 4:10 PM, Christian König wrote:
Am 28.05.21 um 09:33 schrieb Thomas Hellström:
On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
Am 27.05.21 um 17:51 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
Am 27.05.21 um 17:05 schrieb Thomas Hellström: > On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote: >> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote: >>> Am 27.05.21 um 16:19 schrieb Thomas Hellström: >>>> The swapping code was dereference bo->ttm pointers >>>> without >>>> having >>>> the >>>> dma-resv lock held. Also it might try to swap out >>>> unpopulated >>>> bos. >>>> >>>> Fix this by moving the bo->ttm dereference until we have >>>> the >>>> reservation >>>> lock. Check that the ttm_tt is populated after the >>>> swap_notify >>>> callback. >>>> >>>> Signed-off-by: Thomas Hellström >>>> thomas.hellstrom@linux.intel.com >>>> --- >>>> drivers/gpu/drm/ttm/ttm_bo.c | 16 >>>> +++++++++++++++- >>>> drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- >>>> 2 files changed, 18 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >>>> b/drivers/gpu/drm/ttm/ttm_bo.c >>>> index 9f53506a82fc..86213d37657b 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct >>>> ttm_buffer_object >>>> *bo, struct ttm_operation_ctx *ctx, >>>> if (!ttm_bo_evict_swapout_allowable(bo, ctx, >>>> &place, >>>> &locked, NULL)) >>>> return -EBUSY; >>>> + dma_resv_assert_held(bo->base.resv); >>>> + >>>> + if (!bo->ttm || >>>> + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || >>>> + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) >>>> { >>>> + if (locked) >>>> + dma_resv_unlock(bo->base.resv); >>>> + return -EBUSY; >>>> + } >>>> + >>>> if (!ttm_bo_get_unless_zero(bo)) { >>>> if (locked) >>>> dma_resv_unlock(bo->base.resv); >>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct >>>> ttm_buffer_object >>>> *bo, struct ttm_operation_ctx *ctx, >>>> if (bo->bdev->funcs->swap_notify) >>>> bo->bdev->funcs->swap_notify(bo); >>>> - ret = ttm_tt_swapout(bo->bdev, bo->ttm, >>>> gfp_flags); >>>> + if (ttm_tt_is_populated(bo->ttm)) >>>> + ret = ttm_tt_swapout(bo->bdev, bo->ttm, >>>> gfp_flags); >>> Exactly that is what I won't recommend. We would try to >>> swap >>> out >>> the >>> same BO over and over again with that. >> But we wouldn't since the BO is taken off the LRU and never >> re- >> added, >> >> > In fact, we'd probably might want to take the !bo->ttm bos off > the > LRU > as well.. No, we don't want to take any BOs of the LRU unless they are pinned.
Adding a TT object or populating it doesn't necessarily put the BO back to the LRU.
OK, but swapped bos are also taken off the LRU list so these unpopulated bos are just taking the same path. Only difference to swapped is that they don't get read back on re-populate, but typically cleared.
But what would be the point of keeping swapped-out bos on the LRU list?, particularly when we're iterating under a spinlock? Shouldn't we try to re-add to LRU (if not already on an LRU) just before populating? There aren't really that many calls in core TTM.
I want to avoid removing BOs from the LRU as much as possible since we forgot on multiple places that we want to re-add them.
Conceptual I think the swapped BOs should have a separate memory domain, this way we can ignore them cleanly when swapping things out.
Yes, that would of course work as well. Keeping them on the system LRU is IMO highly undesirable.
Going to pick this patch up, modifying it a bit more and then pushing it to drm-misc-fixes for upstreaming.
OK, I dropped the TTM fix for the purge-in-swap-notify from the i915 series, hoping that the reworked variant of this patch lands first.
You will still need to add the second ttm_tt_populated() check since I dropped that for the back which I want to push to -fixes.
Regards, Christian.
OK, great. then you have my S-O-B on this patch.
BTW that original patch that added the ttm_tt_is_populated() was considered "LGTM" by you, except for this ttm_tt_is_populated(). So do I have an Acked-by: on that now?
That is
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork....
plus the check added?
Yeah, sure.
Christian.
Thanks,
Thomas
Thanks, Thomas
Thanks, Christian.
/Thomas
Christian.
> /Thomas >
Am 27.05.21 um 17:01 schrieb Thomas Hellström:
On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
Am 27.05.21 um 16:19 schrieb Thomas Hellström:
The swapping code was dereference bo->ttm pointers without having the dma-resv lock held. Also it might try to swap out unpopulated bos.
Fix this by moving the bo->ttm dereference until we have the reservation lock. Check that the ttm_tt is populated after the swap_notify callback.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/ttm/ttm_bo.c | 16 +++++++++++++++- drivers/gpu/drm/ttm/ttm_device.c | 8 +++----- 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9f53506a82fc..86213d37657b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY;
+ dma_resv_assert_held(bo->base.resv);
+ if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + }
if (!ttm_bo_get_unless_zero(bo)) { if (locked) dma_resv_unlock(bo->base.resv); @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
- ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
Exactly that is what I won't recommend. We would try to swap out the same BO over and over again with that.
But we wouldn't since the BO is taken off the LRU and never re-added,
Well then that would be a bug in itself.
Why not move that to the check above as well?
Because the BO may become unpopulated in swap_notify(), i915, like vmwgfx, sometimes sets up gpu bindings from system, and when we get a notification from user-space that those are purgeable, we don't want to purge immediately but wait for a potential swapout.
Uff, good point. But then we need to check that at both locations I think.
Because populating the TT object currently doesn't put the BO back on the LRU eventually.
Christian.
/Thomas
Christian.
out:
/* @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo);
+ /* Don't break locking rules. */ + WARN_ON(ret == -EBUSY); return ret; }
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 460953dcad11..eaa7487ae404 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { list_for_each_entry(bo, &man->lru[j], lru) { - uint32_t num_pages; + pgoff_t num_pages;
- if (!bo->ttm || - bo->ttm->page_flags & TTM_PAGE_FLAG_SG || - bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + if (!READ_ONCE(bo->ttm)) continue;
- num_pages = bo->ttm->num_pages; + num_pages = bo->base.size >> PAGE_SHIFT; ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ if (!ret)
dri-devel@lists.freedesktop.org