From: Tvrtko Ursulin tvrtko.ursulin@intel.com
A small chunk of dropped and mostly already reviewed patches (a couple need review updated due rebasing I had to do) with the goal of getting to actual fixes in the next round.
Chris Wilson (12): drm/i915: Take rcu_read_lock for querying fence's driver/timeline names drm/i915: Remove notion of GEM from i915_gem_shrinker_taints_mutex drm/i915: Lift marking a lock as used to utils drm/i915: Wrap cmpxchg64 with try_cmpxchg64() helper drm/i915/selftests: Set cache status for huge_gem_object drm/i915/selftests: Use a coherent map to setup scratch batch buffers drm/i915/selftests: Replace the unbounded set-domain with an explicit wait drm/i915/selftests: Remove redundant set-to-gtt-domain drm/i915/selftests: Replace unbound set-domain waits with explicit timeouts drm/i915/selftests: Replace an unbounded set-domain wait with a timeout drm/i915/selftests: Remove redundant set-to-gtt-domain before batch submission drm/i915/gem: Manage all set-domain waits explicitly
drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 14 -- drivers/gpu/drm/i915/gem/i915_gem_shrinker.h | 2 - .../gpu/drm/i915/gem/selftests/huge_pages.c | 22 +-- .../i915/gem/selftests/i915_gem_client_blt.c | 26 ++- .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_context.c | 18 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 16 -- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- .../gpu/drm/i915/gt/selftest_workarounds.c | 107 +++++------- drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 + drivers/gpu/drm/i915/i915_utils.c | 28 +++ drivers/gpu/drm/i915/i915_utils.h | 41 +++++ drivers/gpu/drm/i915/selftests/i915_vma.c | 6 - .../drm/i915/selftests/intel_memory_region.c | 7 +- 26 files changed, 240 insertions(+), 312 deletions(-)
From: Chris Wilson chris@chris-wilson.co.uk
The name very often may be freed independently of the fence, with the only protection being RCU. To be safe as we read the names, hold RCU.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala mika.kuoppala@linux.intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/i915_sw_fence.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2744558f3050..dfabf291e5cd 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -430,11 +430,13 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) if (!fence) return;
+ rcu_read_lock(); pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%ps)\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno, i915_sw_fence_debug_hint(fence)); + rcu_read_unlock();
i915_sw_fence_set_error_once(fence, -ETIMEDOUT); i915_sw_fence_complete(fence);
On Wed, May 26, 2021 at 03:14:45PM +0100, Tvrtko Ursulin wrote:
From: Chris Wilson chris@chris-wilson.co.uk
The name very often may be freed independently of the fence, with the only protection being RCU. To be safe as we read the names, hold RCU.
Yeah no.
If it's not clear why, figure it out first. -Daniel
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala mika.kuoppala@linux.intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/i915_sw_fence.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2744558f3050..dfabf291e5cd 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -430,11 +430,13 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) if (!fence) return;
rcu_read_lock(); pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%ps)\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno, i915_sw_fence_debug_hint(fence));
rcu_read_unlock();
i915_sw_fence_set_error_once(fence, -ETIMEDOUT); i915_sw_fence_complete(fence);
-- 2.30.2
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
From: Chris Wilson chris@chris-wilson.co.uk
Since we dropped the use of dev->struct_mutex from inside the shrinker, we no longer include that as part of our fs_reclaim tainting. We can drop the i915 argument and rebrand it as a generic fs_reclaim tainter.
v2 (Tvrtko): * Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 14 -------------- drivers/gpu/drm/i915/gem/i915_gem_shrinker.h | 2 -- drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/i915_utils.c | 13 +++++++++++++ drivers/gpu/drm/i915/i915_utils.h | 2 ++ 6 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f4fb68e8955a..d68679a89d93 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -438,20 +438,6 @@ void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) unregister_shrinker(&i915->mm.shrinker); }
-void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, - struct mutex *mutex) -{ - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); - - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); - mutex_release(&mutex->dep_map, _RET_IP_); - - fs_reclaim_release(GFP_KERNEL); -} - #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h index 8512470f6fd6..17ad82ea961f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -27,7 +27,5 @@ unsigned long i915_gem_shrink(struct i915_gem_ww_ctx *ww, unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); void i915_gem_driver_register__shrinker(struct drm_i915_private *i915); void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915); -void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, - struct mutex *mutex);
#endif /* __I915_GEM_SHRINKER_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 9b98f9d9faa3..70d207057ce5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -156,7 +156,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) lockdep_set_subclass(&vm->mutex, subclass);
if (!intel_vm_no_concurrent_access_wa(vm->i915)) { - i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + fs_reclaim_taints_mutex(&vm->mutex); } else { /* * CHV + BXT VTD workaround use stop_machine(), diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5094be6d90f..213dcfef68b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1405,7 +1405,7 @@ void intel_gt_init_reset(struct intel_gt *gt) * within the shrinker, we forbid ourselves from performing any * fs-reclaim or taking related locks during reset. */ - i915_gem_shrinker_taints_mutex(gt->i915, >->reset.mutex); + fs_reclaim_taints_mutex(>->reset.mutex);
/* no GPU until we are ready! */ __set_bit(I915_WEDGED, >->reset.flags); diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index f9e780dee9de..90c7f0c4838c 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -114,3 +114,16 @@ void set_timer_ms(struct timer_list *t, unsigned long timeout) /* Keep t->expires = 0 reserved to indicate a canceled timer. */ mod_timer(t, jiffies + timeout ?: 1); } + +void fs_reclaim_taints_mutex(struct mutex *mutex) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); + mutex_release(&mutex->dep_map, _RET_IP_); + + fs_reclaim_release(GFP_KERNEL); +} diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f02f52ab5070..4133d5193839 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -266,6 +266,8 @@ static inline int list_is_last_rcu(const struct list_head *list, return READ_ONCE(list->next) == head; }
+void fs_reclaim_taints_mutex(struct mutex *mutex); + static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) { unsigned long j = msecs_to_jiffies(m);
From: Chris Wilson chris@chris-wilson.co.uk
After calling lock_set_subclass() the lock _must_ be used, or else lockdep's internal nr_used_locks becomes unbalanced. Extract the little utility function to i915_utils.c
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +------------ drivers/gpu/drm/i915/i915_utils.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_utils.h | 7 +++++++ 3 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3f9a811eb02b..15566819539f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -794,18 +794,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&engine->active.lock.dep_map); - lock_map_release(&engine->active.lock.dep_map); - local_irq_enable(); -#endif + mark_lock_used_irq(&engine->active.lock); }
static struct intel_context * diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 90c7f0c4838c..894de60833ec 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -127,3 +127,18 @@ void fs_reclaim_taints_mutex(struct mutex *mutex)
fs_reclaim_release(GFP_KERNEL); } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __mark_lock_used_irq(struct lockdep_map *lock) +{ + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ + local_irq_disable(); + lock_map_acquire(lock); + lock_map_release(lock); + local_irq_enable(); +} +#endif diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 4133d5193839..c3d234133da7 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -455,6 +455,13 @@ static inline bool timer_expired(const struct timer_list *t) return timer_active(t) && !timer_pending(t); }
+#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __mark_lock_used_irq(struct lockdep_map *lock); +#define mark_lock_used_irq(lock) __mark_lock_used_irq(&(lock)->dep_map) +#else +#define mark_lock_used_irq(lock) +#endif + /* * This is a lookalike for IS_ENABLED() that takes a kconfig value, * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero
From: Chris Wilson chris@chris-wilson.co.uk
Wrap cmpxchg64 with a try_cmpxchg()-esque helper. Hiding the old-value dance in the helper allows for cleaner code.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/i915_utils.h | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index c3d234133da7..a42d9ddd0415 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -475,4 +475,36 @@ void __mark_lock_used_irq(struct lockdep_map *lock); */ #define IS_ACTIVE(config) ((config) != 0)
+#ifndef try_cmpxchg64 +#if IS_ENABLED(CONFIG_64BIT) +#define try_cmpxchg64(_ptr, _pold, _new) try_cmpxchg(_ptr, _pold, _new) +#else +#define try_cmpxchg64(_ptr, _pold, _new) \ +({ \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __cur = cmpxchg64(_ptr, __old, _new); \ + bool success = __cur == __old; \ + if (unlikely(!success)) \ + *_old = __cur; \ + likely(success); \ +}) +#endif +#endif + +#ifndef xchg64 +#if IS_ENABLED(CONFIG_64BIT) +#define xchg64(_ptr, _new) xchg(_ptr, _new) +#else +#define xchg64(_ptr, _new) \ +({ \ + __typeof__(_ptr) __ptr = (_ptr); \ + __typeof__(*(_ptr)) __old = *__ptr; \ + while (!try_cmpxchg64(__ptr, &__old, (_new))) \ + ; \ + __old; \ +}) +#endif +#endif + #endif /* !__I915_UTILS_H */
From: Chris Wilson chris@chris-wilson.co.uk
Set the cache coherency and status using the set-coherency helper. Otherwise, we forget to mark the new pages as cache dirty.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..33dd4e2a1010 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -171,10 +171,8 @@ huge_pages_object(struct drm_i915_private *i915, I915_BO_ALLOC_STRUCT_PAGE);
i915_gem_object_set_volatile(obj); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + __start_cpu_write(obj);
obj->mm.page_mask = page_mask;
@@ -324,10 +322,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) i915_gem_object_init(obj, &fake_ops, &lock_class, 0);
i915_gem_object_set_volatile(obj); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + __start_cpu_write(obj);
return obj; } @@ -1004,7 +1000,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
if (needs_flush & CLFLUSH_BEFORE) - drm_clflush_virt_range(ptr, PAGE_SIZE); + drm_clflush_virt_range(&ptr[dword], sizeof(val));
if (ptr[dword] != val) { pr_err("n=%lu ptr[%u]=%u, val=%u\n",
From: Chris Wilson chris@chris-wilson.co.uk
Instead of manipulating the object's cache domain, just use the device coherent map to write the batch buffer.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- .../drm/i915/gem/selftests/i915_gem_context.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index ce70d0a3afb2..3d8d5f242e34 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1622,7 +1622,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm;
- cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1658,7 +1658,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm;
- cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1707,15 +1707,17 @@ static int read_from_scratch(struct i915_gem_context *ctx,
i915_vma_unpin(vma);
+ i915_request_get(rq); i915_request_add(rq);
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + i915_request_put(rq); + err = -ETIME; goto out_vm; + } + i915_request_put(rq);
- cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_vm;
From: Chris Wilson chris@chris-wilson.co.uk
After running client_blt, we flush the object by changing its domain. This causes us to wait forever instead of an bounded wait suitable for the selftest timeout. So do an explicit wait with a suitable timeout -- which in turn means we have to limit the size of the object/blit to run within reason.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- .../i915/gem/selftests/i915_gem_client_blt.c | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index d36873885cc1..baec7bd1fa53 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -23,12 +23,19 @@ static int __igt_client_fill(struct intel_engine_cs *engine) I915_RND_STATE(prng); IGT_TIMEOUT(end); u32 *vaddr; + u64 limit; int err = 0;
+ /* Try to keep the blits within the timeout */ + limit = min_t(u64, ce->vm->total >> 4, + jiffies_to_msecs(i915_selftest.timeout_jiffies) * SZ_2M); + if (!limit) + limit = SZ_4K; + intel_engine_pm_get(engine); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 sz = min_t(u64, limit, prandom_u32_state(&prng)); u32 phys_sz = sz % (max_block_size + 1); u32 val = prandom_u32_state(&prng); u32 i; @@ -73,13 +80,20 @@ static int __igt_client_fill(struct intel_engine_cs *engine) if (err) goto err_unpin;
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + 2 * i915_selftest.timeout_jiffies); + if (err) { + pr_err("%s fill %zxB timed out\n", + engine->name, obj->base.size); goto err_unpin; + }
- for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { + for (i = 0; + i < huge_gem_object_phys_size(obj) / sizeof(u32); + i += 17) { + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + clflush(&vaddr[i]); if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val);
From: Chris Wilson chris@chris-wilson.co.uk
Since the vma's backing store is flushed upon first creation, remove the manual calls to set-to-gtt-domain.
v2 (Tvrtko): * Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- .../gpu/drm/i915/gem/selftests/i915_gem_mman.c | 16 ---------------- drivers/gpu/drm/i915/selftests/i915_vma.c | 6 ------ 2 files changed, 22 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 05a3b29f545e..886e2446e081 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -104,14 +104,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) { - pr_err("Failed to flush to GTT write domain; err=%d\n", err); - return err; - } - page = i915_prandom_u32_max_state(npages, prng); view = compute_partial_view(obj, page, MIN_CHUNK_PAGES);
@@ -189,14 +181,6 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) { - pr_err("Failed to flush to GTT write domain; err=%d\n", err); - return err; - } - for_each_prime_number_from(page, 1, npages) { struct i915_ggtt_view view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index dd0607254a95..24a806801883 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -987,12 +987,6 @@ static int igt_vma_remapped_gtt(void *arg) u32 __iomem *map; unsigned int x, y;
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - goto out; - if (!plane_info[0].dst_stride) plane_info[0].dst_stride = *t == I915_GGTT_VIEW_ROTATED ? p->height : p->width;
From: Chris Wilson chris@chris-wilson.co.uk
Let's prefer to use explicit request tracking and bounded timeouts in our selftests.
v2 (Tvrtko): * Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- .../gpu/drm/i915/gt/selftest_workarounds.c | 107 +++++++----------- 1 file changed, 40 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 64937ec3f2dc..72553a56b225 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -93,56 +93,27 @@ reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) intel_wa_list_free(&lists->gt_wa_list); }
-static struct drm_i915_gem_object * -read_nonprivs(struct intel_context *ce) +static struct i915_request * +read_nonprivs(struct intel_context *ce, struct i915_vma *result) { struct intel_engine_cs *engine = ce->engine; const u32 base = engine->mmio_base; - struct drm_i915_gem_object *result; struct i915_request *rq; - struct i915_vma *vma; u32 srm, *cs; int err; int i;
- result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(result)) - return result; - - i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC); - - cs = i915_gem_object_pin_map_unlocked(result, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_obj; - } - memset(cs, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(result); - i915_gem_object_unpin_map(result); - - vma = i915_vma_instance(result, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_obj; - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_pin; - } + if (IS_ERR(rq)) + return rq;
- i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); + i915_vma_lock(result); + err = i915_request_await_object(rq, result->obj, true); if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + err = i915_vma_move_to_active(result, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(result); if (err) - goto err_req; + goto err_rq;
srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; if (INTEL_GEN(engine->i915) >= 8) @@ -151,28 +122,24 @@ read_nonprivs(struct intel_context *ce) cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); if (IS_ERR(cs)) { err = PTR_ERR(cs); - goto err_req; + goto err_rq; }
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { *cs++ = srm; *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); - *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = i915_ggtt_offset(result) + sizeof(u32) * i; *cs++ = 0; } intel_ring_advance(rq, cs);
+ i915_request_get(rq); i915_request_add(rq); - i915_vma_unpin(vma);
- return result; + return rq;
-err_req: +err_rq: i915_request_add(rq); -err_pin: - i915_vma_unpin(vma); -err_obj: - i915_gem_object_put(result); return ERR_PTR(err); }
@@ -203,32 +170,36 @@ print_results(const struct intel_engine_cs *engine, const u32 *results) static int check_whitelist(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - struct drm_i915_gem_object *results; - struct intel_wedge_me wedge; + struct i915_vma *result; + struct i915_request *rq; + int err = 0; u32 *vaddr; - int err; int i;
- results = read_nonprivs(ce); - if (IS_ERR(results)) - return PTR_ERR(results); - - err = 0; - i915_gem_object_lock(results, NULL); - intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ - err = i915_gem_object_set_to_cpu_domain(results, false); - - if (intel_gt_is_wedged(engine->gt)) - err = -EIO; - if (err) - goto out_put; + result = __vm_create_scratch_for_read(&engine->gt->ggtt->vm, PAGE_SIZE); + if (IS_ERR(result)) + return PTR_ERR(result);
- vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(result->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_put; }
+ memset(vaddr, 0xc5, PAGE_SIZE); + i915_gem_object_flush_map(result->obj); + + rq = read_nonprivs(ce, result); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_map; + } + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out_rq; + } + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { u32 expected = get_whitelist_reg(engine, i); u32 actual = vaddr[i]; @@ -243,10 +214,12 @@ static int check_whitelist(struct intel_context *ce) } }
- i915_gem_object_unpin_map(results); +out_rq: + i915_request_put(rq); +out_map: + i915_gem_object_unpin_map(result->obj); out_put: - i915_gem_object_unlock(results); - i915_gem_object_put(results); + i915_vma_put(result); return err; }
From: Chris Wilson chris@chris-wilson.co.uk
After the memory-region test completes, it flushes the test by calling set-to-cpu-domain. Use the igt_flush_test as it includes a timeout, recovery and reports and error for miscreant tests.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/selftests/intel_memory_region.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index f85fd8cbfbf5..7a3f71e83140 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -826,11 +826,10 @@ static int igt_lmem_write_cpu(void *arg) if (err) goto out_unpin;
- i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) + if (igt_flush_test(engine->i915)) { + err = -EIO; goto out_unpin; + }
count = ARRAY_SIZE(bytes); order = i915_random_order(count * count, &prng);
From: Chris Wilson chris@chris-wilson.co.uk
In construction the rpcs_query batch we know that it is device coherent and ready for execution, the set-to-gtt-domain here is redudant.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3d8d5f242e34..eed5be597eee 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -954,8 +954,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, err = i915_gem_object_lock(obj, &ww); if (!err) err = i915_gem_object_lock(rpcs, &ww); - if (!err) - err = i915_gem_object_set_to_gtt_domain(obj, false); if (!err) err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err)
From: Chris Wilson chris@chris-wilson.co.uk
Only perform the domain transition under the object lock, and push the required waits to outside the lock.
v2 (Tvrtko): * Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 8 - .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 4 +- 12 files changed, 89 insertions(+), 165 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..e4c24558eaa8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { struct clflush *clflush;
- GEM_BUG_ON(!obj->cache_dirty); - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (!clflush) return NULL; @@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
trace_i915_gem_object_clflush(obj);
- clflush = NULL; - if (!(flags & I915_CLFLUSH_SYNC)) - clflush = clflush_work_create(obj); + clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, - obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + obj->base.resv, NULL, true, 0, I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..4cd5787d1507 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -9,12 +9,10 @@
#include <linux/types.h>
-struct drm_i915_private; struct drm_i915_gem_object;
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1)
#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..0926e0895ee6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (!err) err = i915_gem_object_pin_pages(obj); if (!err) { - err = i915_gem_object_set_to_cpu_domain(obj, write); + i915_gem_object_set_to_cpu_domain(obj, write); i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) { @@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (!err) err = i915_gem_object_pin_pages(obj); if (!err) { - err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_set_to_gtt_domain(obj, false); i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..39fda97c49a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break;
case I915_GEM_DOMAIN_CPU: - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + i915_gem_clflush_object(obj, 0); break;
case I915_GEM_DOMAIN_RENDER: @@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_WC) - return 0; - - /* Flush and acquire obj->pages so that we are coherent through - * direct access in memory with previous cached writes through - * shmemfs and that our cache domain tracking remains valid. - * For example, if the obj->filp was moved to swap without us - * being notified and releasing the pages, we would mistakenly - * continue to assume that the obj remained out of the CPU cached - * domain. - */ - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; + return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
@@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; } - - i915_gem_object_unpin_pages(obj); - return 0; }
/** @@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_GTT) - return 0; - - /* Flush and acquire obj->pages so that we are coherent through - * direct access in memory with previous cached writes through - * shmemfs and that our cache domain tracking remains valid. - * For example, if the obj->filp was moved to swap without us - * being notified and releasing the pages, we would mistakenly - * continue to assume that the obj remained out of the CPU cached - * domain. - */ - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; + return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
@@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_vma_set_ggtt_write(vma); spin_unlock(&obj->vma.lock); } - - i915_gem_object_unpin_pages(obj); - return 0; }
/** @@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + /* + * While we track when we write though the CPU cache + * (with obj->cache_dirty), this is only a guide as we do + * not know when the CPU may have speculatively populated + * the cache. We have to invalidate such speculative cachelines + * prior to reading writes by the GPU. + */ + i915_gem_clflush_object(obj, 0); obj->read_domains |= I915_GEM_DOMAIN_CPU; }
@@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) */ if (write) __start_cpu_write(obj); - - return 0; }
/** @@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT;
- /* - * Try to flush the object off the GPU without holding the lock. - * We will repeat the flush holding the lock in the normal manner - * to catch cases where we are gazumped. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_PRIORITY | - (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (err) - goto out; - if (i915_gem_object_is_userptr(obj)) { /* * Try to grab userptr pages, iris uses set_domain to check * userptr validity */ err = i915_gem_object_userptr_validate(obj); - if (!err) - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_PRIORITY | - (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - goto out; + if (err) + goto out; }
/* @@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out_unpin;
if (read_domains & I915_GEM_DOMAIN_WC) - err = i915_gem_object_set_to_wc_domain(obj, write_domain); + i915_gem_object_set_to_wc_domain(obj, write_domain); else if (read_domains & I915_GEM_DOMAIN_GTT) - err = i915_gem_object_set_to_gtt_domain(obj, write_domain); + i915_gem_object_set_to_gtt_domain(obj, write_domain); else - err = i915_gem_object_set_to_cpu_domain(obj, write_domain); + i915_gem_object_set_to_cpu_domain(obj, write_domain);
out_unpin: i915_gem_object_unpin_pages(obj); @@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, out_unlock: i915_gem_object_unlock(obj);
+ err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
@@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, false); - if (ret) - goto err_unpin; - else - goto out; - } + !static_cpu_has(X86_FEATURE_CLFLUSH)) + i915_gem_object_set_to_cpu_domain(obj, false); + else + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto err_unpin;
/* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE;
-out: /* return with the pages pinned */ return 0;
@@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; - else - goto out; - } + !static_cpu_has(X86_FEATURE_CLFLUSH)) + i915_gem_object_set_to_cpu_domain(obj, true); + else + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto err_unpin;
/* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, *needs_clflush |= CLFLUSH_BEFORE; }
-out: i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..40fda9e81a78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL;
- err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return ERR_PTR(err); + i915_gem_object_set_to_gtt_domain(obj, true);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..8bbc835e70ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
-int __must_check -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, + bool write); +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, + bool write); +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, + bool write); struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0727d0c76aa0..b8f0413bc3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -188,6 +188,12 @@ struct drm_i915_gem_object { unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + /* + * Note cache_dirty is only a guide; we know when we have written + * through the CPU cache, but we do not know when the CPU may have + * speculatively populated the cache. Before a read via the cache + * of GPU written memory, we have to cautiously invalidate the cache. + */ unsigned int cache_dirty:1;
/** diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 33dd4e2a1010..d85ca79ac433 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce, u32 dw, u32 val) { - int err; - - i915_gem_object_lock(vma->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); - i915_gem_object_unlock(vma->obj); - if (err) - return err; - return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..77ba6d1ef4e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_ALL | + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) int err;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_ALL | + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) int err;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(vma);
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - if (err) - goto out_unlock; + i915_gem_object_set_to_gtt_domain(ctx->obj, false);
rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { @@ -247,7 +263,6 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) i915_request_add(rq); out_unpin: i915_vma_unpin(vma); -out_unlock: i915_gem_object_unlock(ctx->obj);
return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..4d7580762acc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -53,14 +53,10 @@ static int mock_phys_object(void *arg)
/* Make the object dirty so that put_pages must do copy back the data */ i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); - if (err) { - pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", - err); - goto out_obj; - }
+ err = 0; out_obj: i915_gem_object_put(obj); out: diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 0b092c62bb34..ba8c06778b6c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -7,6 +7,7 @@ #include "igt_gem_utils.h"
#include "gem/i915_gem_context.h" +#include "gem/i915_gem_clflush.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" @@ -138,6 +139,8 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto skip_request;
i915_vma_lock(vma); + if (vma->obj->cache_dirty & ~vma->obj->cache_coherent) + i915_gem_clflush_object(vma->obj, 0); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cffd7f4f87dc..dbb983970f34 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -301,9 +301,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj, if (ret) goto err_ww;
- ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto err_ww; + i915_gem_object_set_to_gtt_domain(obj, write);
if (!i915_gem_object_is_tiled(obj)) vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
On 26/05/2021 15:14, Tvrtko Ursulin wrote:
From: Chris Wilson chris@chris-wilson.co.uk
Only perform the domain transition under the object lock, and push the required waits to outside the lock.
v2 (Tvrtko):
- Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 8 - .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 4 +- 12 files changed, 89 insertions(+), 165 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..e4c24558eaa8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { struct clflush *clflush;
- GEM_BUG_ON(!obj->cache_dirty);
- clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (!clflush) return NULL;
@@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
trace_i915_gem_object_clflush(obj);
- clflush = NULL;
- if (!(flags & I915_CLFLUSH_SYNC))
clflush = clflush_work_create(obj);
- clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
i915_fence_timeout(to_i915(obj->base.dev)),
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base);obj->base.resv, NULL, true, 0, I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..4cd5787d1507 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -9,12 +9,10 @@
#include <linux/types.h>
-struct drm_i915_private; struct drm_i915_gem_object;
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1)
#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..0926e0895ee6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_cpu_domain(obj, write);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_cpu_domain(obj, write);
@@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_gtt_domain(obj, false);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_gtt_domain(obj, false);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..39fda97c49a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break;
case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
i915_gem_clflush_object(obj, 0);
break;
case I915_GEM_DOMAIN_RENDER:
@@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) {
int ret;
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
if (obj->write_domain == I915_GEM_DOMAIN_WC)
return 0;
/* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
ret = i915_gem_object_pin_pages(obj);
if (ret)
return ret;
return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
@@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; }
i915_gem_object_unpin_pages(obj);
return 0; }
/**
@@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) {
int ret;
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
if (obj->write_domain == I915_GEM_DOMAIN_GTT)
return 0;
/* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
ret = i915_gem_object_pin_pages(obj);
if (ret)
return ret;
return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
@@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_vma_set_ggtt_write(vma); spin_unlock(&obj->vma.lock); }
i915_gem_object_unpin_pages(obj);
return 0; }
/**
@@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) {
int ret;
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
/*
* While we track when we write though the CPU cache
* (with obj->cache_dirty), this is only a guide as we do
* not know when the CPU may have speculatively populated
* the cache. We have to invalidate such speculative cachelines
* prior to reading writes by the GPU.
*/
obj->read_domains |= I915_GEM_DOMAIN_CPU; }i915_gem_clflush_object(obj, 0);
@@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) */ if (write) __start_cpu_write(obj);
return 0; }
/**
@@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT;
- /*
* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (err)
goto out;
- if (i915_gem_object_is_userptr(obj)) { /*
*/ err = i915_gem_object_userptr_validate(obj);
- Try to grab userptr pages, iris uses set_domain to check
- userptr validity
if (!err)
err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
goto out;
if (err)
goto out;
}
/*
@@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out_unpin;
if (read_domains & I915_GEM_DOMAIN_WC)
err = i915_gem_object_set_to_wc_domain(obj, write_domain);
else if (read_domains & I915_GEM_DOMAIN_GTT)i915_gem_object_set_to_wc_domain(obj, write_domain);
err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
elsei915_gem_object_set_to_gtt_domain(obj, write_domain);
err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
i915_gem_object_set_to_cpu_domain(obj, write_domain);
out_unpin: i915_gem_object_unpin_pages(obj);
@@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, out_unlock: i915_gem_object_unlock(obj);
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);MAX_SCHEDULE_TIMEOUT);
@@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
else
goto out;
}
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, false);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err_unpin;
/* If we're not in the cpu read domain, set ourself into the gtt
- read domain and manually flush cachelines (if required). This
@@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE;
-out: /* return with the pages pinned */ return 0;
@@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
else
goto out;
}
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, true);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err_unpin;
/* If we're not in the cpu write domain, set ourself into the
- gtt write domain and manually flush cachelines (as required).
@@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, *needs_clflush |= CLFLUSH_BEFORE; }
-out: i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..40fda9e81a78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL;
err = i915_gem_object_set_to_gtt_domain(obj, true);
if (err)
return ERR_PTR(err);
i915_gem_object_set_to_gtt_domain(obj, true);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..8bbc835e70ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
-int __must_check -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww,bool write);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0727d0c76aa0..b8f0413bc3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -188,6 +188,12 @@ struct drm_i915_gem_object { unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
/*
* Note cache_dirty is only a guide; we know when we have written
* through the CPU cache, but we do not know when the CPU may have
* speculatively populated the cache. Before a read via the cache
* of GPU written memory, we have to cautiously invalidate the cache.
*/
unsigned int cache_dirty:1;
/**
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 33dd4e2a1010..d85ca79ac433 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce, u32 dw, u32 val) {
- int err;
- i915_gem_object_lock(vma->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
- i915_gem_object_unlock(vma->obj);
- if (err)
return err;
- return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..77ba6d1ef4e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
- i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) int err;
i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
- i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) int err;
i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
- i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(vma);
i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- if (err)
goto out_unlock;
- i915_gem_object_set_to_gtt_domain(ctx->obj, false);
IIRC Daniel pointed out that this looks odd, since this now becomes write=false for some reason. I think keep this as write=true, since it does look like that is what gpu_set wants.
From: Chris Wilson chris@chris-wilson.co.uk
Only perform the domain transition under the object lock, and push the required waits to outside the lock.
v2 (Tvrtko): * Rebase.
v3 (Tvrtko): * Restore write to gtt domain in coherency selftest. (Matt)
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 8 - .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 4 +- 12 files changed, 89 insertions(+), 165 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..e4c24558eaa8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { struct clflush *clflush;
- GEM_BUG_ON(!obj->cache_dirty); - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (!clflush) return NULL; @@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
trace_i915_gem_object_clflush(obj);
- clflush = NULL; - if (!(flags & I915_CLFLUSH_SYNC)) - clflush = clflush_work_create(obj); + clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, - obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + obj->base.resv, NULL, true, 0, I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..4cd5787d1507 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -9,12 +9,10 @@
#include <linux/types.h>
-struct drm_i915_private; struct drm_i915_gem_object;
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1)
#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..0926e0895ee6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (!err) err = i915_gem_object_pin_pages(obj); if (!err) { - err = i915_gem_object_set_to_cpu_domain(obj, write); + i915_gem_object_set_to_cpu_domain(obj, write); i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) { @@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (!err) err = i915_gem_object_pin_pages(obj); if (!err) { - err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_set_to_gtt_domain(obj, false); i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..39fda97c49a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break;
case I915_GEM_DOMAIN_CPU: - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + i915_gem_clflush_object(obj, 0); break;
case I915_GEM_DOMAIN_RENDER: @@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_WC) - return 0; - - /* Flush and acquire obj->pages so that we are coherent through - * direct access in memory with previous cached writes through - * shmemfs and that our cache domain tracking remains valid. - * For example, if the obj->filp was moved to swap without us - * being notified and releasing the pages, we would mistakenly - * continue to assume that the obj remained out of the CPU cached - * domain. - */ - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; + return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
@@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; } - - i915_gem_object_unpin_pages(obj); - return 0; }
/** @@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_GTT) - return 0; - - /* Flush and acquire obj->pages so that we are coherent through - * direct access in memory with previous cached writes through - * shmemfs and that our cache domain tracking remains valid. - * For example, if the obj->filp was moved to swap without us - * being notified and releasing the pages, we would mistakenly - * continue to assume that the obj remained out of the CPU cached - * domain. - */ - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; + return;
flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
@@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_vma_set_ggtt_write(vma); spin_unlock(&obj->vma.lock); } - - i915_gem_object_unpin_pages(obj); - return 0; }
/** @@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * This function returns when the move is complete, including waiting on * flushes to occur. */ -int +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - int ret; - assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + /* + * While we track when we write though the CPU cache + * (with obj->cache_dirty), this is only a guide as we do + * not know when the CPU may have speculatively populated + * the cache. We have to invalidate such speculative cachelines + * prior to reading writes by the GPU. + */ + i915_gem_clflush_object(obj, 0); obj->read_domains |= I915_GEM_DOMAIN_CPU; }
@@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) */ if (write) __start_cpu_write(obj); - - return 0; }
/** @@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT;
- /* - * Try to flush the object off the GPU without holding the lock. - * We will repeat the flush holding the lock in the normal manner - * to catch cases where we are gazumped. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_PRIORITY | - (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (err) - goto out; - if (i915_gem_object_is_userptr(obj)) { /* * Try to grab userptr pages, iris uses set_domain to check * userptr validity */ err = i915_gem_object_userptr_validate(obj); - if (!err) - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_PRIORITY | - (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - goto out; + if (err) + goto out; }
/* @@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out_unpin;
if (read_domains & I915_GEM_DOMAIN_WC) - err = i915_gem_object_set_to_wc_domain(obj, write_domain); + i915_gem_object_set_to_wc_domain(obj, write_domain); else if (read_domains & I915_GEM_DOMAIN_GTT) - err = i915_gem_object_set_to_gtt_domain(obj, write_domain); + i915_gem_object_set_to_gtt_domain(obj, write_domain); else - err = i915_gem_object_set_to_cpu_domain(obj, write_domain); + i915_gem_object_set_to_cpu_domain(obj, write_domain);
out_unpin: i915_gem_object_unpin_pages(obj); @@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, out_unlock: i915_gem_object_unlock(obj);
+ err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
@@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, false); - if (ret) - goto err_unpin; - else - goto out; - } + !static_cpu_has(X86_FEATURE_CLFLUSH)) + i915_gem_object_set_to_cpu_domain(obj, false); + else + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto err_unpin;
/* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE;
-out: /* return with the pages pinned */ return 0;
@@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
assert_object_held(obj);
- ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - ret = i915_gem_object_pin_pages(obj); if (ret) return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; - else - goto out; - } + !static_cpu_has(X86_FEATURE_CLFLUSH)) + i915_gem_object_set_to_cpu_domain(obj, true); + else + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto err_unpin;
/* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, *needs_clflush |= CLFLUSH_BEFORE; }
-out: i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..40fda9e81a78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL;
- err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return ERR_PTR(err); + i915_gem_object_set_to_gtt_domain(obj, true);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..8bbc835e70ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
-int __must_check -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, + bool write); +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, + bool write); +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, + bool write); struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0727d0c76aa0..b8f0413bc3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -188,6 +188,12 @@ struct drm_i915_gem_object { unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + /* + * Note cache_dirty is only a guide; we know when we have written + * through the CPU cache, but we do not know when the CPU may have + * speculatively populated the cache. Before a read via the cache + * of GPU written memory, we have to cautiously invalidate the cache. + */ unsigned int cache_dirty:1;
/** diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 33dd4e2a1010..d85ca79ac433 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce, u32 dw, u32 val) { - int err; - - i915_gem_object_lock(vma->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); - i915_gem_object_unlock(vma->obj); - if (err) - return err; - return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..6a5a7a7fbae2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_ALL | + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) int err = 0;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) int err;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_ALL | + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) int err;
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); + + err = i915_gem_object_wait(ctx->obj, + I915_WAIT_INTERRUPTIBLE, + HZ / 2); if (err) return err;
@@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(vma);
i915_gem_object_lock(ctx->obj, NULL); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - if (err) - goto out_unlock; + i915_gem_object_set_to_gtt_domain(ctx->obj, true);
rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { @@ -247,7 +263,6 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) i915_request_add(rq); out_unpin: i915_vma_unpin(vma); -out_unlock: i915_gem_object_unlock(ctx->obj);
return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..4d7580762acc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -53,14 +53,10 @@ static int mock_phys_object(void *arg)
/* Make the object dirty so that put_pages must do copy back the data */ i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); - if (err) { - pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", - err); - goto out_obj; - }
+ err = 0; out_obj: i915_gem_object_put(obj); out: diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 0b092c62bb34..ba8c06778b6c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -7,6 +7,7 @@ #include "igt_gem_utils.h"
#include "gem/i915_gem_context.h" +#include "gem/i915_gem_clflush.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" @@ -138,6 +139,8 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto skip_request;
i915_vma_lock(vma); + if (vma->obj->cache_dirty & ~vma->obj->cache_coherent) + i915_gem_clflush_object(vma->obj, 0); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cffd7f4f87dc..dbb983970f34 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -301,9 +301,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj, if (ret) goto err_ww;
- ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto err_ww; + i915_gem_object_set_to_gtt_domain(obj, write);
if (!i915_gem_object_is_tiled(obj)) vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
On Wed, May 26, 2021 at 03:30:57PM +0100, Matthew Auld wrote:
On 26/05/2021 15:14, Tvrtko Ursulin wrote:
From: Chris Wilson chris@chris-wilson.co.uk
Only perform the domain transition under the object lock, and push the required waits to outside the lock.
Do we have actual performance data justifying this? Anything else justifying this?
If we resurrect patches, I do expect actual review happens here, and that's not even close the case for this patch here at least. I didn't bother looking at the others. -Daniel
v2 (Tvrtko):
- Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 8 - .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 4 +- 12 files changed, 89 insertions(+), 165 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..e4c24558eaa8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { struct clflush *clflush;
- GEM_BUG_ON(!obj->cache_dirty);
- clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (!clflush) return NULL;
@@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, trace_i915_gem_object_clflush(obj);
- clflush = NULL;
- if (!(flags & I915_CLFLUSH_SYNC))
clflush = clflush_work_create(obj);
- clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
i915_fence_timeout(to_i915(obj->base.dev)),
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base);obj->base.resv, NULL, true, 0, I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..4cd5787d1507 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -9,12 +9,10 @@ #include <linux/types.h> -struct drm_i915_private; struct drm_i915_gem_object; bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1) #endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..0926e0895ee6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_cpu_domain(obj, write);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_cpu_domain(obj, write);
@@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_gtt_domain(obj, false);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_gtt_domain(obj, false);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..39fda97c49a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break; case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break; case I915_GEM_DOMAIN_RENDER:i915_gem_clflush_object(obj, 0);
@@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- if (obj->write_domain == I915_GEM_DOMAIN_WC)
return 0;
- /* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
return ret;
flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);return;
@@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; }
- i915_gem_object_unpin_pages(obj);
- return 0; } /**
@@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- if (obj->write_domain == I915_GEM_DOMAIN_GTT)
return 0;
- /* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
return ret;
flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);return;
@@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_vma_set_ggtt_write(vma); spin_unlock(&obj->vma.lock); }
- i915_gem_object_unpin_pages(obj);
- return 0; } /**
@@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
/*
* While we track when we write though the CPU cache
* (with obj->cache_dirty), this is only a guide as we do
* not know when the CPU may have speculatively populated
* the cache. We have to invalidate such speculative cachelines
* prior to reading writes by the GPU.
*/
obj->read_domains |= I915_GEM_DOMAIN_CPU; }i915_gem_clflush_object(obj, 0);
@@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) */ if (write) __start_cpu_write(obj);
- return 0; } /**
@@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT;
- /*
* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (err)
goto out;
- if (i915_gem_object_is_userptr(obj)) { /*
*/ err = i915_gem_object_userptr_validate(obj);
- Try to grab userptr pages, iris uses set_domain to check
- userptr validity
if (!err)
err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
goto out;
if (err)
} /*goto out;
@@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out_unpin; if (read_domains & I915_GEM_DOMAIN_WC)
err = i915_gem_object_set_to_wc_domain(obj, write_domain);
else if (read_domains & I915_GEM_DOMAIN_GTT)i915_gem_object_set_to_wc_domain(obj, write_domain);
err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
elsei915_gem_object_set_to_gtt_domain(obj, write_domain);
err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
out_unpin: i915_gem_object_unpin_pages(obj);i915_gem_object_set_to_cpu_domain(obj, write_domain);
@@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, out_unlock: i915_gem_object_unlock(obj);
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);MAX_SCHEDULE_TIMEOUT);
@@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- ret = i915_gem_object_pin_pages(obj); if (ret) return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
else
goto out;
- }
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, false);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
/* If we're not in the cpu read domain, set ourself into the gttgoto err_unpin;
- read domain and manually flush cachelines (if required). This
@@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE; -out: /* return with the pages pinned */ return 0; @@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- ret = i915_gem_object_pin_pages(obj); if (ret) return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
else
goto out;
- }
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, true);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
/* If we're not in the cpu write domain, set ourself into thegoto err_unpin;
- gtt write domain and manually flush cachelines (as required).
@@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, *needs_clflush |= CLFLUSH_BEFORE; } -out: i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..40fda9e81a78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL;
err = i915_gem_object_set_to_gtt_domain(obj, true);
if (err)
return ERR_PTR(err);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE |i915_gem_object_set_to_gtt_domain(obj, true);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..8bbc835e70ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); -int __must_check -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww,bool write);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0727d0c76aa0..b8f0413bc3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -188,6 +188,12 @@ struct drm_i915_gem_object { unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
- /*
* Note cache_dirty is only a guide; we know when we have written
* through the CPU cache, but we do not know when the CPU may have
* speculatively populated the cache. Before a read via the cache
* of GPU written memory, we have to cautiously invalidate the cache.
unsigned int cache_dirty:1; /***/
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 33dd4e2a1010..d85ca79ac433 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce, u32 dw, u32 val) {
- int err;
- i915_gem_object_lock(vma->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
- i915_gem_object_unlock(vma->obj);
- if (err)
return err;
- return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..77ba6d1ef4e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) int err = 0; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) int err = 0; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
- i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) int err; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
- i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) int err; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
- i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(vma); i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- if (err)
goto out_unlock;
- i915_gem_object_set_to_gtt_domain(ctx->obj, false);
IIRC Daniel pointed out that this looks odd, since this now becomes write=false for some reason. I think keep this as write=true, since it does look like that is what gpu_set wants.
dri-devel@lists.freedesktop.org