On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote:
Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex") moves the vma binding to dma_fence_work, but dma_fence_work has has signalling fence semantics.
On braswell, we can call stop_machine inside fence_work, causing a splat because memory allocation inside stop_machine is allowed.
This patch does not fix that lockdep splat yet, but will allow the next patch to remove it.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
Thomas pointed out that this removes pipelined pte writing for both ggtt and ppgtt, and that's a bit much. So need to retract my ack here. -Daniel
drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 - drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 - drivers/gpu/drm/i915/gt/intel_gtt.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 6 - drivers/gpu/drm/i915/i915_vma.c | 174 +++------------------ drivers/gpu/drm/i915/i915_vma.h | 5 +- drivers/gpu/drm/i915/i915_vma_types.h | 3 - 9 files changed, 21 insertions(+), 178 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0597de206de..ec04df0a3b89 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt, if (!drm_mm_node_allocated(&ggtt->error_capture)) return;
if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
return; /* beware stop_machine() inversion */
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
mutex_lock(&ggtt->error_mutex);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index e08dff376339..0c5a9a767849 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1;
- ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 176c19633412..92f8a23e66cc 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) goto err_free_pd; }
- ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 670c1271e7d5..e1ec6edae1fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
i915_vma_wait_for_bind(vma);
if (i915_vma_is_pinned(vma)) continue;
@@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
ggtt->alias = ppgtt;
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
@@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
ggtt->vm.bind_async_flags =
I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index e67e34e17913..d9d2ca8b4b61 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -230,8 +230,6 @@ struct i915_address_space { u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */
- unsigned int bind_async_flags;
- /*
- Each active user context has its own address space (in full-ppgtt).
- Since the vm may be shared between multiple contexts, we count how
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b23f58e94cfb..4639c47c038b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, mutex_unlock(&ggtt->vm.mutex); }
- ret = i915_vma_wait_for_bind(vma);
- if (ret) {
i915_vma_unpin(vma);
return ERR_PTR(ret);
- }
- return vma;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 07490db51cdc..e4b2590d41ce 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; }
-struct i915_vma_work {
- struct dma_fence_work base;
- struct i915_address_space *vm;
- struct i915_vm_pt_stash stash;
- struct i915_vma *vma;
- struct drm_i915_gem_object *pinned;
- struct i915_sw_dma_fence_cb cb;
- enum i915_cache_level cache_level;
- unsigned int flags;
-};
-static int __vma_bind(struct dma_fence_work *work) -{
- struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
- struct i915_vma *vma = vw->vma;
- vma->ops->bind_vma(vw->vm, &vw->stash,
vma, vw->cache_level, vw->flags);
- return 0;
-}
-static void __vma_release(struct dma_fence_work *work) -{
- struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
- if (vw->pinned) {
__i915_gem_object_unpin_pages(vw->pinned);
i915_gem_object_put(vw->pinned);
- }
- i915_vm_free_pt_stash(vw->vm, &vw->stash);
- i915_vm_put(vw->vm);
-}
-static const struct dma_fence_work_ops bind_ops = {
- .name = "bind",
- .work = __vma_bind,
- .release = __vma_release,
-};
-struct i915_vma_work *i915_vma_work(void) -{
- struct i915_vma_work *vw;
- vw = kzalloc(sizeof(*vw), GFP_KERNEL);
- if (!vw)
return NULL;
- dma_fence_work_init(&vw->base, &bind_ops);
- vw->base.dma.error = -EAGAIN; /* disable the worker by default */
- return vw;
-}
-int i915_vma_wait_for_bind(struct i915_vma *vma) -{
- int err = 0;
- if (rcu_access_pointer(vma->active.excl.fence)) {
struct dma_fence *fence;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
rcu_read_unlock();
if (fence) {
err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);
}
- }
- return err;
-}
/**
- i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
- @vma: VMA to map
@@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) */ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 flags,
struct i915_vma_work *work)
u32 flags, struct i915_vm_pt_stash *stash)
{ u32 bind_flags; u32 vma_flags; @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma, GEM_BUG_ON(!vma->pages);
trace_i915_vma_bind(vma, bind_flags);
- if (work && bind_flags & vma->vm->bind_async_flags) {
struct dma_fence *prev;
work->vma = vma;
work->cache_level = cache_level;
work->flags = bind_flags;
/*
* Note we only want to chain up to the migration fence on
* the pages (not the object itself). As we don't track that,
* yet, we have to use the exclusive fence instead.
*
* Also note that we do not want to track the async vma as
* part of the obj->resv->excl_fence as it only affects
* execution and not content or object's backing store lifetime.
*/
prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
if (prev) {
__i915_sw_fence_await_dma_fence(&work->base.chain,
prev,
&work->cb);
dma_fence_put(prev);
}
work->base.dma.error = 0; /* enable the queue_work() */
if (vma->obj) {
__i915_gem_object_pin_pages(vma->obj);
work->pinned = i915_gem_object_get(vma->obj);
}
- } else {
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
- }
vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
atomic_or(bind_flags, &vma->flags); return 0;
@@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false;
- if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
return true;
- if (vma->node.size < size) return true;
@@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) if (unlikely(flags & ~bound)) return false;
if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
if (unlikely(bound & I915_VMA_OVERFLOW)) return false;
if (!(bound & I915_VMA_PIN_MASK))
@@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) */ mutex_lock(&vma->vm->mutex); do {
if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
}if (unlikely(bound & I915_VMA_OVERFLOW)) { pinned = false; break;
@@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma) int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) {
- struct i915_vma_work *work = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err;
- struct i915_vm_pt_stash stash = {};
#ifdef CONFIG_PROVE_LOCKING if (debug_locks && !WARN_ON(!ww) && vma->resv) @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- if (flags & vma->vm->bind_async_flags) {
/* lock VM */
err = i915_vm_lock_objects(vma->vm, ww);
- if (vma->vm->allocate_va_range) {
err = i915_vm_alloc_pt_stash(vma->vm,
&stash,
if (err) goto err_rpm;vma->size);
work = i915_vma_work();
if (!work) {
err = -ENOMEM;
err = i915_vm_lock_objects(vma->vm, ww);
if (err) goto err_rpm;
}
work->vm = i915_vm_get(vma->vm);
/* Allocate enough page directories to used PTE */
if (vma->vm->allocate_va_range) {
err = i915_vm_alloc_pt_stash(vma->vm,
&work->stash,
vma->size);
if (err)
goto err_fence;
err = i915_vm_pin_pt_stash(vma->vm,
&work->stash);
if (err)
goto err_fence;
}
err = i915_vm_pin_pt_stash(vma->vm,
&stash);
if (err)
goto err_rpm;
}
/*
@@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err = mutex_lock_interruptible_nested(&vma->vm->mutex, !(flags & PIN_GLOBAL)); if (err)
goto err_fence;
goto err_rpm;
/* No more allocations allowed now we hold vm->mutex */
@@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, }
bound = atomic_read(&vma->flags);
- if (unlikely(bound & I915_VMA_ERROR)) {
err = -ENOMEM;
goto err_unlock;
- }
- if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { err = -EAGAIN; /* pins are meant to be fairly temporary */ goto err_unlock;
@@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0,
flags, work);
if (err) goto err_remove;flags, &stash);
@@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, i915_active_release(&vma->active); err_unlock: mutex_unlock(&vma->vm->mutex); -err_fence:
- if (work)
dma_fence_work_commit_imm(&work->base);
err_rpm:
- i915_vm_free_pt_stash(vma->vm, &stash); if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); vma_put_pages(vma);
@@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); else err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
if (err)
i915_vma_unpin(vma);
}
if (err != -ENOSPC) return err;
}
/* Unlike i915_vma_pin, we don't take no for an answer! */ flush_idle_contexts(vm->gt);
@@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma) trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma->vm, vma); }
- atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE), &vma->flags);
i915_vma_detach(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8df784a026d2..d1d0fc76ef99 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); }
-struct i915_vma_work *i915_vma_work(void); int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags,
struct i915_vma_work *work);
struct i915_vm_pt_stash *stash);
bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma);
-int i915_vma_wait_for_bind(struct i915_vma *vma);
static inline int i915_vma_sync(struct i915_vma *vma) { /* Wait for the asynchronous bindings and pending GPU reads */ diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 6b1bfa230b82..82868a755a96 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -240,9 +240,6 @@ struct i915_vma {
#define I915_VMA_ALLOC_BIT 12
-#define I915_VMA_ERROR_BIT 13 -#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT))
#define I915_VMA_GGTT_BIT 14 #define I915_VMA_CAN_FENCE_BIT 15
#define I915_VMA_USERFAULT_BIT 16
2.31.0