[PATCH 01/11] drm/i915: Release i915_gem_context from a worker

List overview All Threads
Download

newer

older

[PATCH] drm/i915/gtt: add some...

[PULL] drm-misc-next-fixes

Daniel Vetter

13 Aug 2021 13 Aug '21

8:30 p.m.

The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) { - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct i915_gem_context *ctx = container_of(work, typeof(*ctx), + release_work);

trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + + queue_work(ctx->i915->wq, &ctx->release_work); +} + static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); + INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;

-- 2.32.0

Show replies by date

Daniel Vetter

13 Aug 13 Aug

8:30 p.m.

New subject: [PATCH 02/11] drm/i915: Release ctx->syncobj on final put, not on ctx close

gem context refcounting is another exercise in least locking design it seems, where most things get destroyed upon context closure (which can race with anything really). Only the actual memory allocation and the locks survive while holding a reference.

This tripped up Jason when reimplementing the single timeline feature in

commit 00dae4d3d35d4f526929633b76e00b0ab4d3970d Author: Jason Ekstrand jason@jlekstrand.net Date: Thu Jul 8 10:48:12 2021 -0500

drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)

We could fix the bug by holding ctx->mutex in execbuf and clear the pointer (again while holding the mutex) context_close, but it's cleaner to just make the context object actually invariant over its _entire_ lifetime. This way any other ioctl that's potentially racing, but holding a full reference, can still rely on ctx->syncobj being an immutable pointer. Which without this change, is not the case.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: Jason Ekstrand jason@jlekstrand.net Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Matthew Brost matthew.brost@intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: "Thomas Hellström" thomas.hellstrom@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 051bc357ff65..5a053cf14948 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -994,6 +994,9 @@ static void i915_gem_context_release_work(struct work_struct *work) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));

+ if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex);

@@ -1220,9 +1223,6 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm);

- if (ctx->syncobj) - drm_syncobj_put(ctx->syncobj); - ctx->file_priv = ERR_PTR(-EBADF);

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 03/11] drm/i915: Keep gem ctx->vm alive until the final put

The comment added in

commit b81dde719439c8f09bb61e742ed95bfc4b33946b Author: Chris Wilson chris@chris-wilson.co.uk Date: Tue May 21 22:11:29 2019 +0100

drm/i915: Allow userspace to clone contexts on creation

and moved in

commit 27dbae8f36c1c25008b7885fc07c57054b7dfba3 Author: Chris Wilson chris@chris-wilson.co.uk Date: Wed Nov 6 09:13:12 2019 +0000

drm/i915/gem: Safely acquire the ctx->vm when copying

suggested that i915_address_space were at least intended to be managed through SLAB_TYPESAFE_BY_RCU:

* This ppgtt may have be reallocated between * the read and the kref, and reassigned to a third * context. In order to avoid inadvertent sharing * of this ppgtt with that third context (and not * src), we have to confirm that we have the same * ppgtt after passing through the strong memory * barrier implied by a successful * kref_get_unless_zero().

But extensive git history search has not brough any such reuse to light.

What has come to light though is that ever since

commit 2850748ef8763ab46958e43a4d1c445f29eeb37d Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Oct 4 14:39:58 2019 +0100

drm/i915: Pull i915_vma_pin under the vm->mutex

(yes this commit is earlier) the final i915_vma_put call has been moved from i915_gem_context_free (now called _release) to context_close, which means it's not actually safe anymore to access the ctx->vm pointer without lock helds, because it might disappear at any moment. Note that superficially things all still work, because the i915_address_space is RCU protected since

commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 20 19:37:05 2019 +0100

drm/i915/gtt: Defer address space cleanup to an RCU worker

except the very clever macro above (which is designed to protected against object reuse due to SLAB_TYPESAFE_BY_RCU or similar tricks) results in an endless loop if the refcount of the ctx->vm ever permanently drops to 0. Which it totally now can.

Fix that by moving the final i915_vm_put to where it should be.

Note that i915_gem_context is rcu protected, but _only_ the final kfree. This means anyone who chases a pointer to a gem ctx solely under the protection can pretty only call kref_get_unless_zero(). This seems to be pretty much the case, aside from a bunch of cases that consult the scheduling information without any further protection.

Cc: Jason Ekstrand jason@jlekstrand.net Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Matthew Brost matthew.brost@intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: "Thomas Hellström" thomas.hellstrom@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5a053cf14948..12e2de1db1a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -990,6 +990,7 @@ static void i915_gem_context_release_work(struct work_struct *work) { struct i915_gem_context *ctx = container_of(work, typeof(*ctx), release_work); + struct i915_address_space *vm;

trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -997,6 +998,10 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj);

+ vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_put(vm); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex);

@@ -1220,8 +1225,15 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx);

vm = i915_gem_context_vm(ctx); - if (vm) + if (vm) { + /* i915_vm_close drops the final reference, which is a bit too + * early and could result in surprises with concurrent + * operations racing with thist ctx close. Keep a full reference + * until the end. + */ + i915_vm_get(vm); i915_vm_close(vm); + }

ctx->file_priv = ERR_PTR(-EBADF);

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 04/11] drm/i915: Drop code to handle set-vm races from execbuf

Changing the vm from a finalized gem ctx is no longer possible, which means we don't have to check for that anymore.

I was pondering whether to keep the check as a WARN_ON, but things go boom real bad real fast if the vm of a vma is wrong. Plus we'd need to also get the ggtt vm for !full-ppgtt platforms. Ditching it all seemed like a better idea.

References: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e809aca00f72..905b1cbd22d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -775,11 +775,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb, /* Check that the context hasn't been closed in the meantime */ err = -EINTR; if (!mutex_lock_interruptible(&ctx->lut_mutex)) { - struct i915_address_space *vm = rcu_access_pointer(ctx->vm); - - if (unlikely(vm && vma->vm != vm)) - err = -EAGAIN; /* user racing with ctx set-vm */ - else if (likely(!i915_gem_context_is_closed(ctx))) + if (likely(!i915_gem_context_is_closed(ctx))) err = radix_tree_insert(&ctx->handles_vma, handle, vma); else err = -ENOENT;

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 05/11] drm/i915: Rename i915_gem_context_get_vm_rcu to i915_gem_context_get_eb_vm

The important part isn't so much that this does an rcu lookup - that's more an implementation detail, which will also be removed.

The thing that makes this different from other functions is that it's gettting you the vm that batchbuffers will run in for that gem context, which is either a full ppgtt stored in gem->ctx, or the ggtt.

We'll make more use of this function later on.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 4 ++-- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18060536b0c2..da6e8b506d96 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -155,7 +155,7 @@ i915_gem_context_vm(struct i915_gem_context *ctx) }

static inline struct i915_address_space * -i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm;

diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a094f3ce1a90..6c68fe26bb32 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1456,7 +1456,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1512,7 +1512,7 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eb5050f8cb3..d436ce7fa25c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1528,7 +1528,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,

intel_gt_chipset_flush(engine->gt);

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1607,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600;

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..b3863abc51f5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (batch) { struct i915_address_space *vm;

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(batch, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 08f011f893b2..6023c418ee8a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -117,7 +117,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f843a5040706..2d60a5a5b065 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1300,7 +1300,7 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM;

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); err = func(vm, 0, min(vm->total, limit), end_time); i915_vm_put(vm);

@@ -1848,7 +1848,7 @@ static int igt_cs_tlb(void *arg) goto out_unlock; }

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); if (i915_is_ggtt(vm)) goto out_vm;

diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index dd0607254a95..79ba72da0813 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -118,7 +118,7 @@ static int create_vmas(struct drm_i915_private *i915, struct i915_vma *vma; int err;

- vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = checked_vma_instance(obj, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma))

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 06/11] drm/i915: Use i915_gem_context_get_eb_vm in ctx_getparam

Consolidates the "which is the vm my execbuf runs in" code a bit. We do some get/put which isn't really required, but all the other users want the refcounting, and I figured doing a function just for this getparam to avoid 2 atomis is a bit much.

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 12e2de1db1a2..7a566fb7cca4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2108,6 +2108,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; struct i915_gem_context *ctx; + struct i915_address_space *vm; int ret = 0;

ctx = i915_gem_context_lookup(file_priv, args->ctx_id); @@ -2117,12 +2118,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - rcu_read_lock(); - if (rcu_access_pointer(ctx->vm)) - args->value = rcu_dereference(ctx->vm)->total; - else - args->value = to_i915(dev)->ggtt.vm.total; - rcu_read_unlock(); + vm = i915_gem_context_get_eb_vm(ctx); + args->value = vm->total; + i915_vm_put(vm); + break;

case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 07/11] drm/i915: Add i915_gem_context_is_full_ppgtt

And use it anywhere we have open-coded checks for ctx->vm that really only check for full ppgtt.

Plus for paranoia add a GEM_BUG_ON that checks it's really only set when we have full ppgtt, just in case. gem_context->vm is different since it's NULL in ggtt mode, unlike intel_context->vm or gt->vm, which is always set.

v2: 0day found a testcase that I missed.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.h | 7 +++++++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +++--- 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7a566fb7cca4..1eec85944c1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1566,7 +1566,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, int err; u32 id;

- if (!rcu_access_pointer(ctx->vm)) + if (!i915_gem_context_is_full_ppgtt(ctx)) return -ENODEV;

rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index da6e8b506d96..37536a260e6e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -154,6 +154,13 @@ i915_gem_context_vm(struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); }

+static inline bool i915_gem_context_is_full_ppgtt(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + + return !!rcu_access_pointer(ctx->vm); +} + static inline struct i915_address_space * i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 905b1cbd22d5..40f08948f0b2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -749,7 +749,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return PTR_ERR(ctx);

eb->gem_context = ctx; - if (rcu_access_pointer(ctx->vm)) + if (i915_gem_context_is_full_ppgtt(ctx)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;

return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d436ce7fa25c..0708b9cdeb9f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -704,7 +704,7 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_is_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -838,7 +838,7 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_is_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1417,7 +1417,7 @@ static int igt_ctx_readonly(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), ce->engine->name, - yesno(!!ctx_vm(ctx)), + yesno(i915_gem_context_is_full_ppgtt(ctx)), err); i915_gem_context_unlock_engines(ctx); goto out_file;

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 08/11] drm/i915: Use i915_gem_context_get_eb_vm in intel_context_set_gem

Since

commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand jason@jlekstrand.net Date: Thu Jul 8 10:48:30 2021 -0500

drm/i915/gem: Don't allow changing the VM on running contexts (v4)

the gem_ctx->vm can't change anymore. Plus we always set the intel_context->vm, so might as well use the helper we have for that.

This makes it very clear that we always overwrite intel_context->vm for userspace contexts, since the default is gt->vm, which is explicitly reserved for kernel context use. It would be good to split things up a bit further and avoid any possibility for an accident where we run kernel stuff in userspace vm or the other way round.

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1eec85944c1f..18e23d9220ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -791,16 +791,8 @@ static int intel_context_set_gem(struct intel_context *ce,

ce->ring_size = SZ_16K;

- if (rcu_access_pointer(ctx->vm)) { - struct i915_address_space *vm; - - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); /* hmm */ - rcu_read_unlock(); - - i915_vm_put(ce->vm); - ce->vm = vm; - } + i915_vm_put(ce->vm); + ce->vm = i915_gem_context_get_eb_vm(ctx);

if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine) &&

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 09/11] drm/i915: Drop __rcu from gem_context->vm

It's been invariant since

commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand jason@jlekstrand.net Date: Thu Jul 8 10:48:30 2021 -0500

drm/i915/gem: Don't allow changing the VM on running contexts (v4)

this just completes the deed. I've tried to split out prep work for more careful review as much as possible, this is what's left:

- get_ppgtt gets simplified since we don't need to grab a temporary reference - we can rely on the temporary reference for the gem_ctx while we inspect the vm. The new vm_id still needs a full i915_vm_open ofc. This also removes the final caller of context_get_vm_rcu

- A pile of selftests can now just look at ctx->vm instead of rcu_dereference_protected( , true) or similar things.

- All callers of i915_gem_context_vm also disappear.

- I've changed the hugepage selftest to set scrub_64K without any locking, because when we inspect that setting we're also not taking any locks either. It works because it's a selftests that's careful (single threaded gives you nice ordering) and not a live driver where races can happen from anywhere.

These can only be split up further if we have some intermediate state with a bunch more rcu_dereference_protected(ctx->vm, true), just to shut up lockdep and sparse.

The conversion to __rcu happened in

commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Oct 4 14:40:09 2019 +0100

drm/i915: Move context management under GEM

Note that we're not breaking the actual bugfix in there: The real bugfix is pushing the i915_vm_relase onto a separate worker, to avoid locking inversion issues. The rcu conversion was just thrown in for entertainment value on top (no vm lookup isn't even close to anything that's a hotpath where removing the single spinlock can be measured).

v2: Rebase over the change to move the i915_vm_put() into i915_gem_context_release().

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 55 ++----------------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 14 +---- .../gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 4 +- .../drm/i915/gem/selftests/i915_gem_context.c | 24 +++----- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 22 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 18e23d9220ae..fe8cd5456438 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -742,44 +742,6 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, return ret; }

-static struct i915_address_space * -context_get_vm_rcu(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - - do { - struct i915_address_space *vm; - - /* - * We do not allow downgrading from full-ppgtt [to a shared - * global gtt], so ctx->vm cannot become NULL. - */ - vm = rcu_dereference(ctx->vm); - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of ctx, - * we no longer care if it is released from ctx, as - * it cannot be reallocated elsewhere. - */ - - if (vm == rcu_access_pointer(ctx->vm)) - return rcu_pointer_handoff(vm); - - i915_vm_put(vm); - } while (1); -} - static int intel_context_set_gem(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_sseu sseu) @@ -990,7 +952,7 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj);

- vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) i915_vm_put(vm);

@@ -1216,7 +1178,7 @@ static void context_close(struct i915_gem_context *ctx)

set_closed_name(ctx);

- vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) { /* i915_vm_close drops the final reference, which is a bit too * early and could result in surprises with concurrent @@ -1335,7 +1297,7 @@ i915_gem_create_context(struct drm_i915_private *i915, vm = &ppgtt->vm; } if (vm) { - RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); + ctx->vm = i915_vm_open(vm);

/* i915_vm_open() takes a reference */ i915_vm_put(vm); @@ -1561,15 +1523,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (!i915_gem_context_is_full_ppgtt(ctx)) return -ENODEV;

- rcu_read_lock(); - vm = context_get_vm_rcu(ctx); - rcu_read_unlock(); - if (!vm) - return -ENODEV; + vm = ctx->vm; + GEM_BUG_ON(!vm);

err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL); if (err) - goto err_put; + return err;

i915_vm_open(vm);

@@ -1577,8 +1536,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, args->value = id; args->size = 0;

-err_put: - i915_vm_put(vm); return err; }

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 37536a260e6e..7696bc91647d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -148,17 +148,11 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); }

-static inline struct i915_address_space * -i915_gem_context_vm(struct i915_gem_context *ctx) -{ - return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); -} - static inline bool i915_gem_context_is_full_ppgtt(struct i915_gem_context *ctx) { - GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + GEM_BUG_ON(!!ctx->vm != HAS_FULL_PPGTT(ctx->i915));

- return !!rcu_access_pointer(ctx->vm); + return !!ctx->vm; }

static inline struct i915_address_space * @@ -166,12 +160,10 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm;

- rcu_read_lock(); - vm = rcu_dereference(ctx->vm); + vm = ctx->vm; if (!vm) vm = &ctx->i915->ggtt.vm; vm = i915_vm_get(vm); - rcu_read_unlock();

return vm; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 0c38789bd4a8..c4617e4d9fa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -262,7 +262,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space __rcu *vm; + struct i915_address_space *vm;

/** * @pid: process id of creator diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 6c68fe26bb32..5d71626a1ee5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1688,11 +1688,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_file; }

- mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) WRITE_ONCE(vm->scrub_64K, true); - mutex_unlock(&ctx->mutex);

err = i915_subtests(tests, ctx);

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 0708b9cdeb9f..4370a90d8a50 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -27,12 +27,6 @@

#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))

-static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) -{ - /* single threaded, private ctx */ - return rcu_dereference_protected(ctx->vm, true); -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -813,7 +807,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce;

- ctx = kernel_context(i915, ctx_vm(parent)); + ctx = kernel_context(i915, parent->vm); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; @@ -823,7 +817,7 @@ static int igt_shared_ctx_exec(void *arg) GEM_BUG_ON(IS_ERR(ce));

if (!obj) { - obj = create_test_object(ctx_vm(parent), + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1380,7 +1374,7 @@ static int igt_ctx_readonly(void *arg) goto out_file; }

- vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; + vm = ctx->vm ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; goto out_file; @@ -1499,7 +1493,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,

GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);

- err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err;

@@ -1596,7 +1590,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,

GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);

- err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err;

@@ -1739,7 +1733,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) u32 *vaddr; int err = 0;

- vm = ctx_vm(ctx); + vm = ctx->vm; if (!vm) return -ENODEV;

@@ -1801,7 +1795,7 @@ static int igt_vm_isolation(void *arg) }

/* We can only test vm isolation, if the vm are distinct */ - if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + if (ctx_a->vm == ctx_b->vm) goto out_file;

/* Read the initial state of the scratch page */ @@ -1813,8 +1807,8 @@ static int igt_vm_isolation(void *arg) if (err) goto out_file;

- vm_total = ctx_vm(ctx_a)->total; - GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); + vm_total = ctx_a->vm->total; + GEM_BUG_ON(ctx_b->vm->total != vm_total);

count = 0; num_engines = 0; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..237e5061381b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -1246,7 +1246,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = rcu_access_pointer(ctx->vm); + __entry->vm = ctx->vm; ),

TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 79ba72da0813..1f10fe36619b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -39,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true;

- if (vma->vm != rcu_access_pointer(ctx->vm)) { + if (vma->vm != ctx->vm) { pr_err("VMA created with wrong VM\n"); ok = false; }

-- 2.32.0

Daniel Vetter

8:30 p.m.

New subject: [PATCH 10/11] drm/i915: use xa_lock/unlock for fpriv->vm_xa lookups

We don't need the absolute speed of rcu for this. And i915_address_space in general dont need rcu protection anywhere else, after we've made gem contexts and engines a lot more immutable.

Note that this semantically reverts

commit aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Aug 30 19:03:25 2019 +0100

drm/i915: Use RCU for unlocked vm_idr lookup

except we have the conversion from idr to xarray in between.

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 005b1cec7007..e37fac8fac0c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1881,11 +1881,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm;

- rcu_read_lock(); + xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); if (vm && !kref_get_unless_zero(&vm->ref)) vm = NULL; - rcu_read_unlock(); + xa_unlock(&file_priv->vm_xa);

return vm; }

-- 2.32.0

Maarten Lankhorst

31 Aug 31 Aug

9:29 a.m.

New subject: [PATCH 10/11] drm/i915: use xa_lock/unlock for fpriv->vm_xa lookups

Op 13-08-2021 om 22:30 schreef Daniel Vetter:

...

We don't need the absolute speed of rcu for this. And i915_address_space in general dont need rcu protection anywhere else, after we've made gem contexts and engines a lot more immutable.

Note that this semantically reverts

commit aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Aug 30 19:03:25 2019 +0100
drm/i915: Use RCU for unlocked vm_idr lookup
except we have the conversion from idr to xarray in between.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 005b1cec7007..e37fac8fac0c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1881,11 +1881,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm;

rcu_read_lock();

xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); if (vm && !kref_get_unless_zero(&vm->ref)) vm = NULL;

I think this could be a plain i915_vm_get now, kref_get_unless_zero is not guarded by RCU any more.

...

rcu_read_unlock();

xa_unlock(&file_priv->vm_xa);

return vm;

}

Apart from that, all looks good.

With this fix, for patch 2-11:

Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com

Daniel Vetter

12:14 p.m.

New subject: [PATCH] drm/i915: use xa_lock/unlock for fpriv->vm_xa lookups

We don't need the absolute speed of rcu for this. And i915_address_space in general dont need rcu protection anywhere else, after we've made gem contexts and engines a lot more immutable.

Note that this semantically reverts

commit aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Aug 30 19:03:25 2019 +0100

drm/i915: Use RCU for unlocked vm_idr lookup

except we have the conversion from idr to xarray in between.

v2: kref_get_unless_zero is no longer required (Maarten)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index be2392bbcecc..d89ff55d8fc8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1874,11 +1874,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm;

- rcu_read_lock(); + xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + if (vm) + kref_get(&vm->ref); + xa_unlock(&file_priv->vm_xa);

return vm; }

-- 2.33.0

Daniel Vetter

13 Aug 13 Aug

8:30 p.m.

New subject: [PATCH 11/11] drm/i915: Stop rcu support for i915_address_space

The full audit is quite a bit of work:

- i915_dpt has very simple lifetime (somehow we create a display pagetable vm per object, so its _very_ simple, there's only ever a single vma in there), and uses i915_vm_close(), which internally does a i915_vm_put(). No rcu.

Aside: wtf is i915_dpt doing in the intel_display.c garbage collector as a new feature, instead of added as a separate file with some clean-ish interface.

Also, i915_dpt unfortunately re-introduces some coding patterns from pre-dma_resv_lock conversion times.

- i915_gem_proto_ctx is fully refcounted and no rcu, all protected by fpriv->proto_context_lock.

- i915_gem_context is itself rcu protected, and that might leak to anything it points at. Before

commit cf977e18610e66e48c31619e7e0cfa871be9eada Author: Chris Wilson chris@chris-wilson.co.uk Date: Wed Dec 2 11:21:40 2020 +0000

drm/i915/gem: Spring clean debugfs

and

commit db80a1294c231b6ac725085f046bb2931e00c9db Author: Chris Wilson chris@chris-wilson.co.uk Date: Mon Jan 18 11:08:54 2021 +0000

drm/i915/gem: Remove per-client stats from debugfs/i915_gem_objects

we had a bunch of debugfs files that relied on rcu protecting everything, but those are gone now. The main one was removed even earlier with

There doesn't seem to be anything left that's actually protecting stuff now that the ctx->vm itself is invariant. See

commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand jason@jlekstrand.net Date: Thu Jul 8 10:48:30 2021 -0500

drm/i915/gem: Don't allow changing the VM on running contexts (v4)

Note that we drop the vm refcount before the final release of the gem context refcount, so this is all very dangerous even without rcu. Note that aside from later on creating new engines (a defunct feature) and debug output we're never looked at gem_ctx->vm for anything functional, hence why this is ok. Fingers crossed.

Preceeding patches removed all vestiges of rcu use from gem_ctx->vm derferencing to make it clear it's really not used.

The gem_ctx->rcu protection was introduced in

commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Oct 4 14:40:09 2019 +0100

drm/i915: Move context management under GEM

The commit message is somewhat entertaining because it fails to mention this fact completely, and compensates that by an in-commit changelog entry that claims that ctx->vm is protected by ctx->mutex. Which was the case _before_ this commit, but no longer after it.

- intel_context holds a full reference. Unfortunately intel_context is also rcu protected and the reference to the ->vm is dropped before the rcu barrier - only the kfree is delayed. So again we need to check whether that leaks anywhere on the intel_context->vm. RCU is only used to protect intel_context sitting on the breadcrumb lists, which don't look at the vm anywhere, so we are fine.

Nothing else relies on rcu protection of intel_context and hence is fully protected by the kref refcount alone, which protects intel_context->vm in turn.

The breadcrumbs rcu usage was added in

commit c744d50363b714783bbc88d986cc16def13710f7 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Nov 26 14:04:06 2020 +0000

drm/i915/gt: Split the breadcrumb spinlock between global and contexts

its parent commit added the intel_context rcu protection:

commit 14d1eaf08845c534963c83f754afe0cb14cb2512 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Nov 26 14:04:05 2020 +0000

drm/i915/gt: Protect context lifetime with RCU

given some credence to my claim that I've actually caught them all.

- drm_i915_gem_object's shares_resv_from pointer has a full refcount to the dma_resv, which is a sub-refcount that's released after the final i915_vm_put() has been called. Safe.

Aside: Maybe we should have a struct dma_resv_shared which is just dma_resv + kref as a stand-alone thing. It's a pretty useful pattern which other drivers might want to copy.

For a bit more context see

commit 4d8151ae5329cf50781a02fd2298a909589a5bab Author: Thomas Hellström thomas.hellstrom@linux.intel.com Date: Tue Jun 1 09:46:41 2021 +0200

drm/i915: Don't free shared locks while shared

- the fpriv->vm_xa was relying on rcu_read_lock for lookup, but that was updated in a prep patch too to just be a spinlock-protected lookup.

- intel_gt->vm is set at driver load in intel_gt_init() and released in intel_gt_driver_release(). There seems to be some issue that in some error paths this is called twice, but otherwise no rcu to be found anywhere. This was added in the below commit, which unfortunately doesn't explain why this complication exists.

commit e6ba76480299a0d77c51d846f7467b1673aad25b Author: Chris Wilson chris@chris-wilson.co.uk Date: Sat Dec 21 16:03:24 2019 +0000

drm/i915: Remove i915->kernel_context

The proper fix most likely for this is to start using drmm_ at large scale, but that's also huge amounts of work.

- i915_vma->vm is some real pain, because rcu is rcu protected, at least in the vma lookup in the context lookup cache in eb_lookup_vma(). This was added in

commit 4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 Author: Chris Wilson chris@chris-wilson.co.uk Date: Fri Jun 16 15:05:16 2017 +0100

drm/i915: Store a direct lookup from object handle to vma

This was changed to a radix tree from the hashtable in, but with the locking unchanged, in

commit d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc Author: Chris Wilson chris@chris-wilson.co.uk Date: Wed Aug 16 09:52:08 2017 +0100

drm/i915: Replace execbuf vma ht with an idr

commit 93159e12353c2a47e5576d642845a91fa00530bf Author: Chris Wilson chris@chris-wilson.co.uk Date: Mon Mar 23 09:28:41 2020 +0000

drm/i915/gem: Avoid gem_context->mutex for simple vma lookup

the locking was changed from dev->struct_mutex to rcu, which added the requirement to rcu protect i915_vma. Somehow this was missed in review (or I'm completely blind).

Irrespective of all that the vma lookup cache rcu_read_lock grabs a full reference of the vma and the rcu doesn't leak further. So no impact on i915_address_space from that.

I have not found any other rcu use for i915_vma, but given that it seems broken I also didn't bother to do a careful in-depth audit.

Alltogether there's nothing left in-tree anymore which requires that a pointer deref to an i915_address_space is safe undre rcu_read_lock only.

rcu protection of i915_address_space was introduced in

commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 20 19:37:05 2019 +0100

drm/i915/gtt: Defer address space cleanup to an RCU worker

by mixing up a bugfixing (i915_address_space needs to be released from a worker) with enabling rcu support. The commit message also seems somewhat confused, because it talks about cleanup of WC pages requiring sleep, while the code and linked bugzilla are about a requirement to take dev->struct_mutex (which yes sleeps but it's a much more specific problem). Since final kref_put can be called from pretty much anywhere (including hardirq context through the scheduler's i915_active cleanup) we need a worker here. Hence that part must be kept.

Ideally all these reclaim workers should have some kind of integration with our shrinkers, but for some of these it's rather tricky. Anyway, that's a preexisting condition in the codeebase that we wont fix in this patch here.

We also remove the rcu_barrier in ggtt_cleanup_hw added in

commit 60a4233a4952729089e4df152e730f8f4d0e82ce Author: Chris Wilson chris@chris-wilson.co.uk Date: Mon Jul 29 14:24:12 2019 +0100

drm/i915: Flush the i915_vm_release before ggtt shutdown

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 - drivers/gpu/drm/i915/gt/intel_gtt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..8d71f67926f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)

atomic_set(&ggtt->vm.open, 0);

- rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ flush_workqueue(ggtt->vm.i915->wq);

mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..a0c2b952aa57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -155,7 +155,7 @@ void i915_vm_resv_release(struct kref *kref) static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); + container_of(work, struct i915_address_space, release_work);

vm->cleanup(vm); i915_address_space_fini(vm); @@ -171,7 +171,7 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm);

- queue_rcu_work(vm->i915->wq, &vm->rcu); + queue_work(vm->i915->wq, &vm->release_work); }

void i915_address_space_init(struct i915_address_space *vm, int subclass) @@ -185,7 +185,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref);

- INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1);

/* diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc7153018ebd..5b539bd7645d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -213,7 +213,7 @@ struct i915_vma_ops {

struct i915_address_space { struct kref ref; - struct rcu_work rcu; + struct work_struct release_work;

struct drm_mm mm; struct intel_gt *gt;

-- 2.32.0

Daniel Vetter

14 Aug 14 Aug

10:43 a.m.

New subject: [PATCH] drm/i915: Release i915_gem_context from a worker

Put a FIXME comment in when this should be removable again.

v2: Fix mock_context(), noticed by intel-gfx-ci.

trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+ /** + * @release_work: + * + * Work item for deferred cleanup, since i915_gem_context_put() tends to + * be called from hardirq context. + * + * FIXME: The only real reason for this is &i915_gem_engines.fence, all + * other callers are from process context and need at most some mild + * shuffling to pull the i915_gem_context_put() call out of a spinlock. + */ + struct work_struct release_work; + /** * @rcu: rcu_head for deferred freeing. */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

mutex_init(&ctx->mutex);

-- 2.32.0

Maarten Lankhorst

31 Aug 31 Aug

9:38 a.m.

New subject: [PATCH] drm/i915: Release i915_gem_context from a worker

Op 14-08-2021 om 12:43 schreef Daniel Vetter:

...

The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

v2: Fix mock_context(), noticed by intel-gfx-ci.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
				    release_work);
trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);

queue_work(ctx->i915->wq, &ctx->release_work);

+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
* @release_work:
*
* Work item for deferred cleanup, since i915_gem_context_put() tends to
* be called from hardirq context.
*
* FIXME: The only real reason for this is &i915_gem_engines.fence, all
* other callers are from process context and need at most some mild
* shuffling to pull the i915_gem_context_put() call out of a spinlock.
*/
struct work_struct release_work;

/**

@rcu: rcu_head for deferred freeing.

*/
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915;

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

mutex_init(&ctx->mutex);

---- Is the workqueue really needed? I'm not sure you could still race in drm_syncobj_free when refcount is zero, so in that case removing locking from _release would work as well as a workqueue.

Something like below would keep the drm_sync_obj_put hardirq safe.

I assume when freeing, the cb list is supposed to be empty, so I added a WARN_ON just to be sure, otherwise we should just tear down the list without locking too.

This should be a better alternative for patch 1. ----8<------- diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c9a9d74f338c..9d561decd97e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -462,7 +462,13 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount); - drm_syncobj_replace_fence(syncobj, NULL); + struct dma_fence *old_fence; + + old_fence = rcu_dereference_protected(syncobj->fence, !kref_read(&syncobj->refcount)); + dma_fence_put(old_fence); + + WARN_ON(!list_empty(&syncobj->cb_list)); + kfree(syncobj); } EXPORT_SYMBOL(drm_syncobj_free);

Daniel Vetter

12:16 p.m.

New subject: [PATCH] drm/i915: Release i915_gem_context from a worker

On Tue, Aug 31, 2021 at 11:38:27AM +0200, Maarten Lankhorst wrote:

...

Op 14-08-2021 om 12:43 schreef Daniel Vetter:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

v2: Fix mock_context(), noticed by intel-gfx-ci.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
				    release_work);
trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);

queue_work(ctx->i915->wq, &ctx->release_work);

+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
* @release_work:
*
* Work item for deferred cleanup, since i915_gem_context_put() tends to
* be called from hardirq context.
*
* FIXME: The only real reason for this is &i915_gem_engines.fence, all
* other callers are from process context and need at most some mild
* shuffling to pull the i915_gem_context_put() call out of a spinlock.
*/
struct work_struct release_work;

/**

@rcu: rcu_head for deferred freeing.

*/
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915;

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

mutex_init(&ctx->mutex);
Is the workqueue really needed? I'm not sure you could still race in drm_syncobj_free when refcount is zero, so in that case removing locking from _release would work as well as a workqueue.

Something like below would keep the drm_sync_obj_put hardirq safe.

I assume when freeing, the cb list is supposed to be empty, so I added a WARN_ON just to be sure, otherwise we should just tear down the list without locking too.

This should be a better alternative for patch 1.

This isn't enough, because the problem isn't just the syncobj put. It's also the i915_vm_put, and if we dercuify the intel_context stuff too, then there will be more intel_context_put on top.

So we really need the worker here I think. Trying to make every _unpin() and _put() work from hardirq context with clever locking tricks is why the current code is so incomprehensible.

Also vms are rare enough that we really don't care about some overhead/delay here. -Daniel

...

----8<------- diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c9a9d74f338c..9d561decd97e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -462,7 +462,13 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount);

drm_syncobj_replace_fence(syncobj, NULL);

struct dma_fence *old_fence;

old_fence = rcu_dereference_protected(syncobj->fence, !kref_read(&syncobj->refcount));

dma_fence_put(old_fence);

WARN_ON(!list_empty(&syncobj->cb_list));

kfree(syncobj);

} EXPORT_SYMBOL(drm_syncobj_free);

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Daniel Vetter

3:14 p.m.

New subject: [PATCH] drm/i915: Release i915_gem_context from a worker

On Tue, Aug 31, 2021 at 02:16:56PM +0200, Daniel Vetter wrote:

...

On Tue, Aug 31, 2021 at 11:38:27AM +0200, Maarten Lankhorst wrote:

...
Op 14-08-2021 om 12:43 schreef Daniel Vetter:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

v2: Fix mock_context(), noticed by intel-gfx-ci.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
				    release_work);
trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);

queue_work(ctx->i915->wq, &ctx->release_work);

+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
* @release_work:
*
* Work item for deferred cleanup, since i915_gem_context_put() tends to
* be called from hardirq context.
*
* FIXME: The only real reason for this is &i915_gem_engines.fence, all
* other callers are from process context and need at most some mild
* shuffling to pull the i915_gem_context_put() call out of a spinlock.
*/
struct work_struct release_work;

/**

@rcu: rcu_head for deferred freeing.

*/
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915;

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

mutex_init(&ctx->mutex);
Is the workqueue really needed? I'm not sure you could still race in drm_syncobj_free when refcount is zero, so in that case removing locking from _release would work as well as a workqueue.

Something like below would keep the drm_sync_obj_put hardirq safe.

I assume when freeing, the cb list is supposed to be empty, so I added a WARN_ON just to be sure, otherwise we should just tear down the list without locking too.

This should be a better alternative for patch 1.
This isn't enough, because the problem isn't just the syncobj put. It's also the i915_vm_put, and if we dercuify the intel_context stuff too, then there will be more intel_context_put on top.

So we really need the worker here I think. Trying to make every _unpin() and _put() work from hardirq context with clever locking tricks is why the current code is so incomprehensible.

Also vms are rare enough that we really don't care about some overhead/delay here.

Other reason is the one I explained in the commit message: Aside from the engines i915_active there's no reason why anyone should call i915_gem_context_put outside of process context. And I plan to fix that as the next step. Or at least I'll try to untangle the context/engine lifetime rules a bit. -Daniel

...

-Daniel

...
----8<------- diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c9a9d74f338c..9d561decd97e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -462,7 +462,13 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount);

drm_syncobj_replace_fence(syncobj, NULL);

struct dma_fence *old_fence;

old_fence = rcu_dereference_protected(syncobj->fence, !kref_read(&syncobj->refcount));

dma_fence_put(old_fence);

WARN_ON(!list_empty(&syncobj->cb_list));

kfree(syncobj);

} EXPORT_SYMBOL(drm_syncobj_free);

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Maarten Lankhorst

2 Sep 2 Sep

10:04 a.m.

New subject: [PATCH] drm/i915: Release i915_gem_context from a worker

Op 31-08-2021 om 17:14 schreef Daniel Vetter:

...

On Tue, Aug 31, 2021 at 02:16:56PM +0200, Daniel Vetter wrote:

...
On Tue, Aug 31, 2021 at 11:38:27AM +0200, Maarten Lankhorst wrote:

...
Op 14-08-2021 om 12:43 schreef Daniel Vetter:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

v2: Fix mock_context(), noticed by intel-gfx-ci.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
				    release_work);
trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);

queue_work(ctx->i915->wq, &ctx->release_work);

+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
* @release_work:
*
* Work item for deferred cleanup, since i915_gem_context_put() tends to
* be called from hardirq context.
*
* FIXME: The only real reason for this is &i915_gem_engines.fence, all
* other callers are from process context and need at most some mild
* shuffling to pull the i915_gem_context_put() call out of a spinlock.
*/
struct work_struct release_work;

/**

@rcu: rcu_head for deferred freeing.

*/
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915;

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

mutex_init(&ctx->mutex);
Is the workqueue really needed? I'm not sure you could still race in drm_syncobj_free when refcount is zero, so in that case removing locking from _release would work as well as a workqueue.

Something like below would keep the drm_sync_obj_put hardirq safe.

I assume when freeing, the cb list is supposed to be empty, so I added a WARN_ON just to be sure, otherwise we should just tear down the list without locking too.

This should be a better alternative for patch 1.
This isn't enough, because the problem isn't just the syncobj put. It's also the i915_vm_put, and if we dercuify the intel_context stuff too, then there will be more intel_context_put on top.

So we really need the worker here I think. Trying to make every _unpin() and _put() work from hardirq context with clever locking tricks is why the current code is so incomprehensible.

Also vms are rare enough that we really don't care about some overhead/delay here.
Other reason is the one I explained in the commit message: Aside from the engines i915_active there's no reason why anyone should call i915_gem_context_put outside of process context. And I plan to fix that as the next step. Or at least I'll try to untangle the context/engine lifetime rules a bit. -Daniel

That would definitely help me a lot too, so Acked-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com for patch 1.

Tvrtko Ursulin

12:42 p.m.

New subject: [Intel-gfx] [PATCH 01/11] drm/i915: Release i915_gem_context from a worker

On 13/08/2021 21:30, Daniel Vetter wrote:

...

The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

There is reset which definitely isn't process context.

Otherwise I did not really get from the commit message is this patch fixing an existing problem or preparing something for the future. If the former then as I wrote above - I am pretty sure there are call sites from the tasklet already.

Regards,

Tvrtko

...

Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
				    release_work);
trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{

struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);

queue_work(ctx->i915->wq, &ctx->release_work);

+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) {

@@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);

INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
* @release_work:
*
* Work item for deferred cleanup, since i915_gem_context_put() tends to
* be called from hardirq context.
*
* FIXME: The only real reason for this is &i915_gem_engines.fence, all
* other callers are from process context and need at most some mild
* shuffling to pull the i915_gem_context_put() call out of a spinlock.
*/
struct work_struct release_work;

/**

@rcu: rcu_head for deferred freeing.

*/

Daniel Vetter

3:05 p.m.

New subject: [Intel-gfx] [PATCH 01/11] drm/i915: Release i915_gem_context from a worker

On Thu, Sep 2, 2021 at 2:42 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...

On 13/08/2021 21:30, Daniel Vetter wrote:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

There is reset which definitely isn't process context.

gpu reset runs in process context. The tasklet context is the engines_notify I'm talking about above.

...

Otherwise I did not really get from the commit message is this patch fixing an existing problem or preparing something for the future. If the former then as I wrote above - I am pretty sure there are call sites from the tasklet already.

Regards,

Tvrtko

...
Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

I guess this para wasn't clear, but subsequent patches fix the refcount bugs and need this prep patch here. -Daniel

...

...
Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
                                            release_work);

trace_i915_context_free(ctx);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
queue_work(ctx->i915->wq, &ctx->release_work);
+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) {

@@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);
INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

spin_lock_init(&ctx->stale.lock);
INIT_LIST_HEAD(&ctx->stale.engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
 * @release_work:
 *
 * Work item for deferred cleanup, since i915_gem_context_put() tends to
 * be called from hardirq context.
 *
 * FIXME: The only real reason for this is &i915_gem_engines.fence, all
 * other callers are from process context and need at most some mild
 * shuffling to pull the i915_gem_context_put() call out of a spinlock.
 */
struct work_struct release_work;
/**
 * @rcu: rcu_head for deferred freeing.
 */

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Tvrtko Ursulin

4:20 p.m.

New subject: [Intel-gfx] [PATCH 01/11] drm/i915: Release i915_gem_context from a worker

On 02/09/2021 16:05, Daniel Vetter wrote:

...

On Thu, Sep 2, 2021 at 2:42 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...
On 13/08/2021 21:30, Daniel Vetter wrote:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

There is reset which definitely isn't process context.

gpu reset runs in process context. The tasklet context is the engines_notify I'm talking about above.

I haven't looked very deeply but please double check the path from execlists_submission_tasklet -> execlists_reset -> intel_engine_reset -> __intel_engine_reset -> execlists_reset_rewind -> execlists_reset_csb -> execlists_reset_active -> __i915_request_reset -> mark_guilty -> i915_gem_context_put.

...

...
Otherwise I did not really get from the commit message is this patch fixing an existing problem or preparing something for the future. If the former then as I wrote above - I am pretty sure there are call sites from the tasklet already.

Regards,

Tvrtko

...
Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

I guess this para wasn't clear, but subsequent patches fix the refcount bugs and need this prep patch here.

So up to where in the series are those fixes and where other stuff follows? Worth spliting and having cover letters perhaps? Is the fixing part applicable to the existing code or only comes to play with the syncobj single timeline changes?

Regards,

Tvrtko

...

-Daniel

...
...
Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
                                            release_work);

 trace_i915_context_free(ctx);
 GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
queue_work(ctx->i915->wq, &ctx->release_work);
+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) {

@@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);
INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

 spin_lock_init(&ctx->stale.lock);
 INIT_LIST_HEAD(&ctx->stale.engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
 * @release_work:
 *
 * Work item for deferred cleanup, since i915_gem_context_put() tends to
 * be called from hardirq context.
 *
 * FIXME: The only real reason for this is &i915_gem_engines.fence, all
 * other callers are from process context and need at most some mild
 * shuffling to pull the i915_gem_context_put() call out of a spinlock.
 */
struct work_struct release_work;
 /**
  * @rcu: rcu_head for deferred freeing.
  */

Daniel Vetter

8:02 p.m.

New subject: [Intel-gfx] [PATCH 01/11] drm/i915: Release i915_gem_context from a worker

On Thu, Sep 2, 2021 at 6:20 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...

On 02/09/2021 16:05, Daniel Vetter wrote:

...
On Thu, Sep 2, 2021 at 2:42 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...
On 13/08/2021 21:30, Daniel Vetter wrote:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

There is reset which definitely isn't process context.

gpu reset runs in process context. The tasklet context is the engines_notify I'm talking about above.

I haven't looked very deeply but please double check the path from execlists_submission_tasklet -> execlists_reset -> intel_engine_reset -> __intel_engine_reset -> execlists_reset_rewind -> execlists_reset_csb -> execlists_reset_active -> __i915_request_reset -> mark_guilty -> i915_gem_context_put.

Thanks for pointing this out, I'll add it to the commit message.

More stuff to fix, yay.

...

...
...
Otherwise I did not really get from the commit message is this patch fixing an existing problem or preparing something for the future. If the former then as I wrote above - I am pretty sure there are call sites from the tasklet already.

Regards,

Tvrtko

...
Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

I guess this para wasn't clear, but subsequent patches fix the refcount bugs and need this prep patch here.

So up to where in the series are those fixes and where other stuff follows? Worth spliting and having cover letters perhaps? Is the fixing part applicable to the existing code or only comes to play with the syncobj single timeline changes?

There's Fixes: lines. One is timeline syncobj, the other is 2 years old. -Daniel

...

Regards,

Tvrtko

...
-Daniel

...
...
Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net

drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
                                            release_work);

 trace_i915_context_free(ctx);
 GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
queue_work(ctx->i915->wq, &ctx->release_work);
+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) {

@@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);
INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

 spin_lock_init(&ctx->stale.lock);
 INIT_LIST_HEAD(&ctx->stale.engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
 * @release_work:
 *
 * Work item for deferred cleanup, since i915_gem_context_put() tends to
 * be called from hardirq context.
 *
 * FIXME: The only real reason for this is &i915_gem_engines.fence, all
 * other callers are from process context and need at most some mild
 * shuffling to pull the i915_gem_context_put() call out of a spinlock.
 */
struct work_struct release_work;
 /**
  * @rcu: rcu_head for deferred freeing.
  */

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Tvrtko Ursulin

3 Sep 3 Sep

10:40 a.m.

New subject: [Intel-gfx] [PATCH 01/11] drm/i915: Release i915_gem_context from a worker

On 02/09/2021 21:02, Daniel Vetter wrote:

...

On Thu, Sep 2, 2021 at 6:20 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...
On 02/09/2021 16:05, Daniel Vetter wrote:

...
On Thu, Sep 2, 2021 at 2:42 PM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:

...
On 13/08/2021 21:30, Daniel Vetter wrote:

...
The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context.

There is reset which definitely isn't process context.

gpu reset runs in process context. The tasklet context is the engines_notify I'm talking about above.

I haven't looked very deeply but please double check the path from execlists_submission_tasklet -> execlists_reset -> intel_engine_reset -> __intel_engine_reset -> execlists_reset_rewind -> execlists_reset_csb -> execlists_reset_active -> __i915_request_reset -> mark_guilty -> i915_gem_context_put.

Thanks for pointing this out, I'll add it to the commit message.

More stuff to fix, yay.

...
...
...
Otherwise I did not really get from the commit message is this patch fixing an existing problem or preparing something for the future. If the former then as I wrote above - I am pretty sure there are call sites from the tasklet already.

Regards,

Tvrtko

...
Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure.

I guess this para wasn't clear, but subsequent patches fix the refcount bugs and need this prep patch here.

So up to where in the series are those fixes and where other stuff follows? Worth spliting and having cover letters perhaps? Is the fixing part applicable to the existing code or only comes to play with the syncobj single timeline changes?

There's Fixes: lines. One is timeline syncobj, the other is 2 years old.

So first two patches are standalone and fix the immediate bug? Could you describe the composition and doings of the series in a cover letter so it's possible to have an overview of chunk of work tackled?

Regards,

Tvrtko

...

-Daniel

...
Regards,

Tvrtko

...
-Daniel

...
...
Put a FIXME comment in when this should be removable again.

Signed-off-by: Daniel Vetter daniel.vetter@intel.com Cc: Jon Bloomfield jon.bloomfield@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: "Thomas Hellström" thomas.hellstrom@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Lionel Landwerlin lionel.g.landwerlin@intel.com Cc: Dave Airlie airlied@redhat.com Cc: Jason Ekstrand jason@jlekstrand.net
drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 +++++++++++--
drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, return err; }

-void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) {
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
                                            release_work);

  trace_i915_context_free(ctx);
  GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); }

+void i915_gem_context_release(struct kref *ref) +{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
queue_work(ctx->i915->wq, &ctx->release_work);
+}

static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) {

@@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link);
INIT_WORK(&ctx->release_work, i915_gem_context_release_work);

  spin_lock_init(&ctx->stale.lock);
  INIT_LIST_HEAD(&ctx->stale.engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref;
/**
 * @release_work:
 *
 * Work item for deferred cleanup, since i915_gem_context_put() tends to
 * be called from hardirq context.
 *
 * FIXME: The only real reason for this is &i915_gem_engines.fence, all
 * other callers are from process context and need at most some mild
 * shuffling to pull the i915_gem_context_put() call out of a spinlock.
 */
struct work_struct release_work;
  /**
   * @rcu: rcu_head for deferred freeing.
   */

1338

Age (days ago)

1359

Last active (days ago)

dri-devel@lists.freedesktop.org

22 comments

4 participants

tags (0)

participants (4)

Daniel Vetter
Daniel Vetter
Maarten Lankhorst
Tvrtko Ursulin