In theory if clflush_work_create() somehow fails here, and we don't yet have mm.pages populated then we end up resetting cache_dirty, which is likely wrong, since that will potentially skip the flush-on-acquire, if it was needed.
It looks like intel_user_framebuffer_dirty() can arrive here before the pages are populated.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f0435c6feb68..d09365b5eb29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -20,6 +20,7 @@ static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false;
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } @@ -115,6 +116,5 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); }
- obj->cache_dirty = false; return true; }
We seem to have an unfortunate issue where we arrive from:
i915_gem_object_flush_if_display+0x86/0xd0 [i915] intel_user_framebuffer_dirty+0x1a/0x50 [i915] drm_mode_dirtyfb_ioctl+0xfb/0x1b0
Which can be before the pages are populated(and pinned for display), and so i915_gem_object_has_struct_page() might still return true, as per the ttm backend. We could re-order the later get_pages() call here, but since on discrete everything should already be coherent, with the exception of the display engine, and even there display surfaces must be allocated in device local-memory anyway, so there should in theory be no conceivable reason to ever call i915_gem_clflush_object() on discrete.
References: https://gitlab.freedesktop.org/drm/intel/-/issues/4320 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index d09365b5eb29..b0822fd99709 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -70,6 +70,8 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags) { + + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct clflush *clflush;
assert_object_held(obj); @@ -81,7 +83,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, * anything not backed by physical memory we consider to be always * coherent and not need clflushing. */ - if (!i915_gem_object_has_struct_page(obj)) { + if (!i915_gem_object_has_struct_page(obj) || IS_DGFX(i915)) { obj->cache_dirty = false; return false; } @@ -106,7 +108,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base);
On 10/21/21 13:44, Matthew Auld wrote:
We seem to have an unfortunate issue where we arrive from:
i915_gem_object_flush_if_display+0x86/0xd0 [i915] intel_user_framebuffer_dirty+0x1a/0x50 [i915] drm_mode_dirtyfb_ioctl+0xfb/0x1b0
Which can be before the pages are populated(and pinned for display), and so i915_gem_object_has_struct_page() might still return true, as per the ttm backend. We could re-order the later get_pages() call here, but since on discrete everything should already be coherent, with the exception of the display engine, and even there display surfaces must be allocated in device local-memory anyway, so there should in theory be no conceivable reason to ever call i915_gem_clflush_object() on discrete.
References: https://gitlab.freedesktop.org/drm/intel/-/issues/4320 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com
Move it next to its partner in crime; gpu_write_needs_clflush.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 12 ++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 ++------------- drivers/gpu/drm/i915/i915_gem.c | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index b684a62bf3b0..d30d5a699788 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -22,6 +22,18 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) obj->cache_level == I915_CACHE_WT); }
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (obj->cache_dirty) + return false; + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); +} + static void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 59201801cec5..199f2ef928c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -517,6 +517,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); @@ -535,23 +536,11 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
-static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - if (obj->cache_dirty) - return false; - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - - /* Currently in use by HW (display engine)? Keep flushed. */ - return i915_gem_object_is_framebuffer(obj); -} - static inline void __start_cpu_write(struct drm_i915_gem_object *obj) { obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - if (cpu_write_needs_clflush(obj)) + if (i915_gem_cpu_write_needs_clflush(obj)) obj->cache_dirty = true; }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 981e383d1a5d..d0e642c82064 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -764,7 +764,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * perspective, requiring manual detiling by the client. */ if (!i915_gem_object_has_struct_page(obj) || - cpu_write_needs_clflush(obj)) + i915_gem_cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case.
On 10/21/21 13:44, Matthew Auld wrote:
Move it next to its partner in crime; gpu_write_needs_clflush.
A motivation in the commit message?
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
Otherwise:
Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com
Should not be needed. Even with non-coherent display, we should be using device local-memory there, and not system memory.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 10 ++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 7 +++++-- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 1 + 3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index d30d5a699788..26532c07d467 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -18,18 +18,28 @@
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (IS_DGFX(i915)) + return false; + return !(obj->cache_level == I915_CACHE_NONE || obj->cache_level == I915_CACHE_WT); }
bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + if (obj->cache_dirty) return false;
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true;
+ if (IS_DGFX(i915)) + return false; + /* Currently in use by HW (display engine)? Keep flushed. */ return i915_gem_object_is_framebuffer(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1e426a42a36c..170c74a2e46d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -114,18 +114,21 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj) void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + obj->cache_level = cache_level;
if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | I915_BO_CACHE_COHERENT_FOR_WRITE); - else if (HAS_LLC(to_i915(obj->base.dev))) + else if (HAS_LLC(i915)) obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; else obj->cache_coherent = 0;
obj->cache_dirty = - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) && + !IS_DGFX(i915); }
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8eb1c3a6fc9c..76530ca265de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
/* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { + WARN_ON_ONCE(IS_DGFX(i915)); obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(pages);
On 10/21/21 13:44, Matthew Auld wrote:
Should not be needed. Even with non-coherent display, we should be using device local-memory there, and not system memory.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com
On 10/21/21 13:44, Matthew Auld wrote:
In theory if clflush_work_create() somehow fails here, and we don't yet have mm.pages populated then we end up resetting cache_dirty, which is likely wrong, since that will potentially skip the flush-on-acquire, if it was needed.
It looks like intel_user_framebuffer_dirty() can arrive here before the pages are populated.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f0435c6feb68..d09365b5eb29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -20,6 +20,7 @@ static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages);
- obj->cache_dirty = false;
I think the guidelines are to avoid updating state in async work if at all possible, so we need to add this after __do_clflush() in the sync path and after dma_fence_work_commit() in the async path.
Will that work?
/Thomas
dri-devel@lists.freedesktop.org