Re: [PATCH 1/2] drm/i915: Remove asynchronous vma binding

21 Apr 2021

On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote:
...
Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
but dma_fence_work has has signalling fence semantics.
On braswell, we can call stop_machine inside fence_work, causing a splat
because memory allocation inside stop_machine is allowed.
This patch does not fix that lockdep splat yet, but will allow the next
patch to remove it.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
Thomas pointed out that this removes pipelined pte writing for both ggtt
and ppgtt, and that's a bit much. So need to retract my ack here.
-Daniel
...

drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
 drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
 drivers/gpu/drm/i915/i915_gem.c            |   6 -
 drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
 drivers/gpu/drm/i915/i915_vma.h            |   5 +-
 drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
 9 files changed, 21 insertions(+), 178 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index b0597de206de..ec04df0a3b89 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
   if (!drm_mm_node_allocated(&ggtt->error_capture))
   	return;

if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)

return; /* beware stop_machine() inversion */


GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
mutex_lock(&ggtt->error_mutex);


diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index e08dff376339..0c5a9a767849 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
   ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
   ppgtt->base.vm.top = 1;

ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412..92f8a23e66cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
   		goto err_free_pd;
   }

ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
ppgtt->vm.clear_range = gen8_ppgtt_clear;

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 670c1271e7d5..e1ec6edae1fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));

i915_vma_wait_for_bind(vma);


if (i915_vma_is_pinned(vma))
	continue;


@@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
   ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
ggtt->alias = ppgtt;

ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;


@@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
       IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
   	ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
   	ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;

ggtt->vm.bind_async_flags =


	I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;

}
ggtt->invalidate = gen8_ggtt_invalidate;


diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index e67e34e17913..d9d2ca8b4b61 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -230,8 +230,6 @@ struct i915_address_space {
   u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
   u64 reserved;		/* size addr space reserved */

unsigned int bind_async_flags;
/*
Each active user context has its own address space (in full-ppgtt).
Since the vm may be shared between multiple contexts, we count how



diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b23f58e94cfb..4639c47c038b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
   	mutex_unlock(&ggtt->vm.mutex);
   }

ret = i915_vma_wait_for_bind(vma);
if (ret) {
i915_vma_unpin(vma);


return ERR_PTR(ret);


}
return vma;

}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 07490db51cdc..e4b2590d41ce 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
   return vma;
 }
-struct i915_vma_work {

struct dma_fence_work base;
struct i915_address_space *vm;
struct i915_vm_pt_stash stash;
struct i915_vma *vma;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
enum i915_cache_level cache_level;
unsigned int flags;

-};



-static int __vma_bind(struct dma_fence_work *work)
-{

struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;

vma->ops->bind_vma(vw->vm, &vw->stash,
	   vma, vw->cache_level, vw->flags);


return 0;

-}



-static void __vma_release(struct dma_fence_work *work)
-{

struct i915_vma_work *vw = container_of(work, typeof(*vw), base);

if (vw->pinned) {
__i915_gem_object_unpin_pages(vw->pinned);


i915_gem_object_put(vw->pinned);


}

i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm);

-}



-static const struct dma_fence_work_ops bind_ops = {

.name = "bind",
.work = __vma_bind,
.release = __vma_release,

-};



-struct i915_vma_work *i915_vma_work(void)
-{

struct i915_vma_work *vw;

vw = kzalloc(sizeof(*vw), GFP_KERNEL);
if (!vw)
return NULL;



dma_fence_work_init(&vw->base, &bind_ops);
vw->base.dma.error = -EAGAIN; /* disable the worker by default */

return vw;

-}



-int i915_vma_wait_for_bind(struct i915_vma *vma)
-{

int err = 0;

if (rcu_access_pointer(vma->active.excl.fence)) {
struct dma_fence *fence;



rcu_read_lock();


fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);


rcu_read_unlock();


if (fence) {


	err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);


	dma_fence_put(fence);


}


}

return err;

-}



/**

i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
@vma: VMA to map

@@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
  */
 int i915_vma_bind(struct i915_vma *vma,
   	  enum i915_cache_level cache_level,

  u32 flags,


  struct i915_vma_work *work)




  u32 flags, struct i915_vm_pt_stash *stash)



{
   u32 bind_flags;
   u32 vma_flags;
@@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
   GEM_BUG_ON(!vma->pages);
trace_i915_vma_bind(vma, bind_flags);

if (work && bind_flags & vma->vm->bind_async_flags) {
struct dma_fence *prev;



work->vma = vma;


work->cache_level = cache_level;


work->flags = bind_flags;



/*


 * Note we only want to chain up to the migration fence on


 * the pages (not the object itself). As we don't track that,


 * yet, we have to use the exclusive fence instead.


 *


 * Also note that we do not want to track the async vma as


 * part of the obj->resv->excl_fence as it only affects


 * execution and not content or object's backing store lifetime.


 */


prev = i915_active_set_exclusive(&vma->active, &work->base.dma);


if (prev) {


	__i915_sw_fence_await_dma_fence(&work->base.chain,


					prev,


					&work->cb);


	dma_fence_put(prev);


}



work->base.dma.error = 0; /* enable the queue_work() */



if (vma->obj) {


	__i915_gem_object_pin_pages(vma->obj);


	work->pinned = i915_gem_object_get(vma->obj);


}


} else {
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);


}


vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
atomic_or(bind_flags, &vma->flags);
return 0;


@@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
   if (!drm_mm_node_allocated(&vma->node))
   	return false;

if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
return true;


if (vma->node.size < size)
  return true;

@@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
   	if (unlikely(flags & ~bound))
   		return false;

if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))




if (unlikely(bound & I915_VMA_OVERFLOW))
return false;


if (!(bound & I915_VMA_PIN_MASK))


@@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
    */
   mutex_lock(&vma->vm->mutex);
   do {

if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {




if (unlikely(bound & I915_VMA_OVERFLOW)) {
pinned = false;
break;

}

@@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
 int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   	    u64 size, u64 alignment, u64 flags)
 {

struct i915_vma_work *work = NULL;
intel_wakeref_t wakeref = 0;
unsigned int bound;
int err;


struct i915_vm_pt_stash stash = {};

#ifdef CONFIG_PROVE_LOCKING
   if (debug_locks && !WARN_ON(!ww) && vma->resv)
@@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   if (flags & PIN_GLOBAL)
   	wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);

if (flags & vma->vm->bind_async_flags) {
/* lock VM */


err = i915_vm_lock_objects(vma->vm, ww);




if (vma->vm->allocate_va_range) {
err = i915_vm_alloc_pt_stash(vma->vm,


			     &stash,


			     vma->size);

if (err)
	goto err_rpm;


work = i915_vma_work();


if (!work) {


	err = -ENOMEM;




err = i915_vm_lock_objects(vma->vm, ww);


if (err)
goto err_rpm;




}



work->vm = i915_vm_get(vma->vm);



/* Allocate enough page directories to used PTE */


if (vma->vm->allocate_va_range) {


	err = i915_vm_alloc_pt_stash(vma->vm,


				     &work->stash,


				     vma->size);


	if (err)


		goto err_fence;



	err = i915_vm_pin_pt_stash(vma->vm,


				   &work->stash);


	if (err)


		goto err_fence;


}




err = i915_vm_pin_pt_stash(vma->vm,


			   &stash);


if (err)


	goto err_rpm;

}
/*


@@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   err = mutex_lock_interruptible_nested(&vma->vm->mutex,
   				      !(flags & PIN_GLOBAL));
   if (err)

goto err_fence;




goto err_rpm;


/* No more allocations allowed now we hold vm->mutex */


@@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   }
bound = atomic_read(&vma->flags);

if (unlikely(bound & I915_VMA_ERROR)) {
err = -ENOMEM;


goto err_unlock;


}
if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
  err = -EAGAIN; /* pins are meant to be fairly temporary */
  goto err_unlock;

@@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   GEM_BUG_ON(!vma->pages);
   err = i915_vma_bind(vma,
   		    vma->obj ? vma->obj->cache_level : 0,

	    flags, work);




	    flags, &stash);

if (err)
goto err_remove;

@@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   i915_active_release(&vma->active);
 err_unlock:
   mutex_unlock(&vma->vm->mutex);
-err_fence:

if (work)
dma_fence_work_commit_imm(&work->base);



err_rpm:

i915_vm_free_pt_stash(vma->vm, &stash);
if (wakeref)
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
vma_put_pages(vma);

@@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
   		err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
   	else
   		err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);

if (err != -ENOSPC) {


	if (!err) {


		err = i915_vma_wait_for_bind(vma);


		if (err)


			i915_vma_unpin(vma);


	}




if (err != -ENOSPC)
return err;




}


/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);


@@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
   	trace_i915_vma_unbind(vma);
   	vma->ops->unbind_vma(vma->vm, vma);
   }

atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),


atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
  &vma->flags);
i915_vma_detach(vma);


diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8df784a026d2..d1d0fc76ef99 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
   return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
-struct i915_vma_work *i915_vma_work(void);
 int i915_vma_bind(struct i915_vma *vma,
   	  enum i915_cache_level cache_level,
   	  u32 flags,

  struct i915_vma_work *work);




  struct i915_vm_pt_stash *stash);




bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
-int i915_vma_wait_for_bind(struct i915_vma *vma);



static inline int i915_vma_sync(struct i915_vma *vma)
 {
   /* Wait for the asynchronous bindings and pending GPU reads */
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 6b1bfa230b82..82868a755a96 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -240,9 +240,6 @@ struct i915_vma {
#define I915_VMA_ALLOC_BIT	12
-#define I915_VMA_ERROR_BIT	13
-#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))



#define I915_VMA_GGTT_BIT	14
 #define I915_VMA_CAN_FENCE_BIT	15
#define I915_VMA_USERFAULT_BIT	16
2.31.0
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH 1/2] drm/i915: Remove asynchronous vma binding

#define I915_VMA_USERFAULT_BIT 16