If GUP-ineligible pages are passed to a GEM userptr object, -EFAULT is returned only when the object is actually bound.
The xf86-video-intel userspace driver cannot differentiate this condition, and marks the GPU as wedged. This not only disables graphics acceleration but may also cripple other functions such as VT switch.
Solve this by "prefaulting" user pages on GEM object creation, testing whether all pages are eligible for get_user_pages() in the process. On failure, return -EFAULT so that userspace can fallback to software blitting.
This behavior can be enabled via a new modparam "gem_userptr_prefault", which is false by default.
Known use cases:
- As a debugging aid, invalid pointers and/or wrong pages passed to userptr could be caught much earlier. - Qubes OS R4.0 uses VM_PFNMAP pages from drivers/xen/privcmd.c, in order to map framebuffers from Xen guest to dom0. These pages are not GUP-able, but they cannot be exposed via DMA-BUF either. Previously this issue had gone somehow undetected, until some patch between v4.14 and v4.19 triggered it.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com Cc: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Cc: Jani Nikula jani.nikula@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 35 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_params.c | 3 ++ drivers/gpu/drm/i915/i915_params.h | 1 + 3 files changed, 39 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index f2eaed6aca3d..5d653df2f759 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -712,6 +712,33 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .release = i915_gem_userptr_release, };
+static int i915_gem_userptr_prefault(unsigned long start, + unsigned long nr_pages, + bool readonly) +{ + unsigned int gup_flags = (readonly ? 0 : FOLL_WRITE) | FOLL_NOWAIT; + int err = 0; + + mmap_read_lock(current->mm); + while (nr_pages) { + long ret; + + ret = get_user_pages(start, nr_pages, gup_flags, NULL, NULL); + if (ret < 0) { + err = (int)ret; + break; + } + if (ret == 0) + ret = 1; /* skip this page */ + + start += ret << PAGE_SHIFT; + nr_pages -= ret; + } + mmap_read_unlock(current->mm); + + return err; +} + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -796,6 +823,14 @@ i915_gem_userptr_ioctl(struct drm_device *dev, if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) return -EFAULT;
+ if (i915_modparams.gem_userptr_prefault) { + ret = i915_gem_userptr_prefault((unsigned long)args->user_ptr, + args->user_size >> PAGE_SHIFT, + args->flags & I915_USERPTR_READ_ONLY); + if (ret) + return ret; + } + if (args->flags & I915_USERPTR_READ_ONLY) { /* * On almost all of the older hw, we cannot tell the GPU that diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 7f139ea4a90b..b5e0a88c059f 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -197,6 +197,9 @@ i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif
+i915_param_named(gem_userptr_prefault, bool, 0600, + "Prefault pages when userptr GEM object is created (default: false)"); + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 330c03e2b4f7..323f60298b05 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -79,6 +79,7 @@ struct drm_printer; param(bool, disable_display, false, 0400) \ param(bool, verbose_state_checks, true, 0) \ param(bool, nuclear_pageflip, false, 0400) \ + param(bool, gem_userptr_prefault, false, 0600) \ param(bool, enable_dp_mst, true, 0600) \ param(bool, enable_gvt, false, 0400)
Quoting Jinoh Kang (2021-01-15 16:23:31)
If GUP-ineligible pages are passed to a GEM userptr object, -EFAULT is returned only when the object is actually bound.
The xf86-video-intel userspace driver cannot differentiate this condition, and marks the GPU as wedged.
The idea was to call gem_set_domain on the object to validate the pages after creation. I only did that for read-only... I did however make mesa use set-domain for validation.
As a question how are you getting to call userptr on something that wasn't passed by SHM ipc?
This not only disables graphics acceleration but may also cripple other functions such as VT switch.
That should be a non-sequitur; certainly VT switch works without ever using the GPU.
Solve this by "prefaulting" user pages on GEM object creation, testing whether all pages are eligible for get_user_pages() in the process. On failure, return -EFAULT so that userspace can fallback to software blitting.
See https://patchwork.freedesktop.org/series/33449/ for adding PROBE | POPULATE flags.
But we can just use set-domain. -Chris
On 1/15/21 4:56 PM, Chris Wilson wrote:
Quoting Jinoh Kang (2021-01-15 16:23:31)
If GUP-ineligible pages are passed to a GEM userptr object, -EFAULT is returned only when the object is actually bound.
The xf86-video-intel userspace driver cannot differentiate this condition, and marks the GPU as wedged.
The idea was to call gem_set_domain on the object to validate the pages after creation. I only did that for read-only... I did however make mesa use set-domain for validation.
Thanks for the info!
As a question how are you getting to call userptr on something that wasn't passed by SHM ipc?
Basically XShmAttachFd, which is not exposed on libX11.
This not only disables graphics acceleration but may also cripple other functions such as VT switch.
That should be a non-sequitur; certainly VT switch works without ever using the GPU.
Not that VT switch doesn't work; rather, there's some heavy graphic artifacts such as blank rectangles or part of window going completely transparent. I suppose that's another issue.
Solve this by "prefaulting" user pages on GEM object creation, testing whether all pages are eligible for get_user_pages() in the process. On failure, return -EFAULT so that userspace can fallback to software blitting.
See https://patchwork.freedesktop.org/series/33449/ for adding PROBE | POPULATE flags.
But we can just use set-domain.
So this patch was unnecessary. Thanks for the pointer as to how to patch the userspace.
-Chris
Quoting Chris Wilson (2021-01-15 16:56:42)
Quoting Jinoh Kang (2021-01-15 16:23:31)
If GUP-ineligible pages are passed to a GEM userptr object, -EFAULT is returned only when the object is actually bound.
The xf86-video-intel userspace driver cannot differentiate this condition, and marks the GPU as wedged.
The idea was to call gem_set_domain on the object to validate the pages after creation. I only did that for read-only... I did however make mesa use set-domain for validation.
Hmm, I remember a reason why we wanted to be lazy in populating the pages was that we would often be called to create a map that was never used. So the additional gup + bind was measurable, and we would rather just have a probe. -Chris
On 1/15/21 5:07 PM, Chris Wilson wrote:
Quoting Chris Wilson (2021-01-15 16:56:42)
Quoting Jinoh Kang (2021-01-15 16:23:31)
If GUP-ineligible pages are passed to a GEM userptr object, -EFAULT is returned only when the object is actually bound.
The xf86-video-intel userspace driver cannot differentiate this condition, and marks the GPU as wedged.
The idea was to call gem_set_domain on the object to validate the pages after creation. I only did that for read-only... I did however make mesa use set-domain for validation.
Hmm, I remember a reason why we wanted to be lazy in populating the pages was that we would often be called to create a map that was never used. So the additional gup + bind was measurable, and we would rather just have a probe. -Chris
try_upload__blt uses the map immediately, so I guess that would be an appropriate place to patch.
Basically XShmAttachFd, which is not exposed on libX11.
To clarify: privcmd pages cannot actually be passed by fd, since it's tightly bound with current->mm. There's some sysv shmop hooking hack involved, which is injected in Xorg side.
---
Besides, is there an equivalent code path that lets you eagerly *unpin* pages when closing an userptr object without waiting for the worker?
This is actually more of a problem in drivers/xen/grant-table.c side, since it hangs the current task until all page refs are released, and it didn't even use ZONE_DEVICE pages until recently (while still not using dev_pagemap_ops::page_free, since the unpopulated-alloc pgmap is not MEMORY_DEVICE_FS_DAX apparently).
(You could say that we should switch to DMA-BUF instead and that would be a valid criticism. I'm merely figuring out what the best workaround for the current status quo would be.)
I'm using something like the following: --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 8 ++++++++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 ++- drivers/gpu/drm/i915/i915_params.c | 3 +++ drivers/gpu/drm/i915/i915_params.h | 1 + 5 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 00d24000b5e8..4352a5788fd8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -167,6 +167,14 @@ static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *f i915_lut_handle_free(lut); i915_gem_object_put(obj); } + + if (i915_modparams.gem_userptr_close_immediate && + i915_gem_object_type_has(obj, I915_GEM_OBJECT_IMM_RELEASE) && + i915_gem_object_is_shrinkable(obj) && + !atomic_read(&obj->mm.shrink_pin) && + i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_TEST) == 0) + __i915_gem_object_put_pages(obj); }
static void __i915_gem_free_object_rcu(struct rcu_head *head) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e2d9b7e1e152..0ac1dfed0b91 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -36,6 +36,7 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_IS_PROXY BIT(3) #define I915_GEM_OBJECT_NO_MMAP BIT(4) #define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) +#define I915_GEM_OBJECT_IMM_RELEASE BIT(7)
/* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index f2eaed6aca3d..baa91daf43a1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -705,7 +705,8 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | I915_GEM_OBJECT_NO_MMAP | - I915_GEM_OBJECT_ASYNC_CANCEL, + I915_GEM_OBJECT_ASYNC_CANCEL | + I915_GEM_OBJECT_IMM_RELEASE, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 7f139ea4a90b..4d056fd1b6e7 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -197,6 +197,9 @@ i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif
+i915_param_named(gem_userptr_close_immediate, bool, 0600, + "Immediately release pages when userptr GEM object is closed (default:false)"); + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 330c03e2b4f7..a94367a0345b 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -79,6 +79,7 @@ struct drm_printer; param(bool, disable_display, false, 0400) \ param(bool, verbose_state_checks, true, 0) \ param(bool, nuclear_pageflip, false, 0400) \ + param(bool, gem_userptr_close_immediate, false, 0600) \ param(bool, enable_dp_mst, true, 0600) \ param(bool, enable_gvt, false, 0400)
dri-devel@lists.freedesktop.org