On Wed, May 26, 2021 at 03:30:57PM +0100, Matthew Auld wrote:
On 26/05/2021 15:14, Tvrtko Ursulin wrote:
From: Chris Wilson chris@chris-wilson.co.uk
Only perform the domain transition under the object lock, and push the required waits to outside the lock.
Do we have actual performance data justifying this? Anything else justifying this?
If we resurrect patches, I do expect actual review happens here, and that's not even close the case for this patch here at least. I didn't bother looking at the others. -Daniel
v2 (Tvrtko):
- Rebase.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Matthew Auld matthew.auld@intel.com # v1 Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 163 +++++------------- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 6 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 8 - .../i915/gem/selftests/i915_gem_coherency.c | 31 +++- .../drm/i915/gem/selftests/i915_gem_phys.c | 8 +- .../drm/i915/gem/selftests/igt_gem_utils.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 4 +- 12 files changed, 89 insertions(+), 165 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..e4c24558eaa8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { struct clflush *clflush;
- GEM_BUG_ON(!obj->cache_dirty);
- clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (!clflush) return NULL;
@@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, trace_i915_gem_object_clflush(obj);
- clflush = NULL;
- if (!(flags & I915_CLFLUSH_SYNC))
clflush = clflush_work_create(obj);
- clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
i915_fence_timeout(to_i915(obj->base.dev)),
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base);obj->base.resv, NULL, true, 0, I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..4cd5787d1507 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -9,12 +9,10 @@ #include <linux/types.h> -struct drm_i915_private; struct drm_i915_gem_object; bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1) #endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..0926e0895ee6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_cpu_domain(obj, write);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_cpu_domain(obj, write);
@@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (!err) err = i915_gem_object_pin_pages(obj); if (!err) {
err = i915_gem_object_set_to_gtt_domain(obj, false);
i915_gem_object_unpin_pages(obj); } if (err == -EDEADLK) {i915_gem_object_set_to_gtt_domain(obj, false);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..39fda97c49a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break; case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break; case I915_GEM_DOMAIN_RENDER:i915_gem_clflush_object(obj, 0);
@@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- if (obj->write_domain == I915_GEM_DOMAIN_WC)
return 0;
- /* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
return ret;
flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);return;
@@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; }
- i915_gem_object_unpin_pages(obj);
- return 0; } /**
@@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- if (obj->write_domain == I915_GEM_DOMAIN_GTT)
return 0;
- /* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
* being notified and releasing the pages, we would mistakenly
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
return ret;
flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);return;
@@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_vma_set_ggtt_write(vma); spin_unlock(&obj->vma.lock); }
- i915_gem_object_unpin_pages(obj);
- return 0; } /**
@@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
- This function returns when the move is complete, including waiting on
- flushes to occur.
*/ -int +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) {
- int ret;
- assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
/*
* While we track when we write though the CPU cache
* (with obj->cache_dirty), this is only a guide as we do
* not know when the CPU may have speculatively populated
* the cache. We have to invalidate such speculative cachelines
* prior to reading writes by the GPU.
*/
obj->read_domains |= I915_GEM_DOMAIN_CPU; }i915_gem_clflush_object(obj, 0);
@@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) */ if (write) __start_cpu_write(obj);
- return 0; } /**
@@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT;
- /*
* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
- if (err)
goto out;
- if (i915_gem_object_is_userptr(obj)) { /*
*/ err = i915_gem_object_userptr_validate(obj);
- Try to grab userptr pages, iris uses set_domain to check
- userptr validity
if (!err)
err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
goto out;
if (err)
} /*goto out;
@@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out_unpin; if (read_domains & I915_GEM_DOMAIN_WC)
err = i915_gem_object_set_to_wc_domain(obj, write_domain);
else if (read_domains & I915_GEM_DOMAIN_GTT)i915_gem_object_set_to_wc_domain(obj, write_domain);
err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
elsei915_gem_object_set_to_gtt_domain(obj, write_domain);
err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
out_unpin: i915_gem_object_unpin_pages(obj);i915_gem_object_set_to_cpu_domain(obj, write_domain);
@@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, out_unlock: i915_gem_object_unlock(obj);
- err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);MAX_SCHEDULE_TIMEOUT);
@@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- ret = i915_gem_object_pin_pages(obj); if (ret) return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
else
goto out;
- }
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, false);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
/* If we're not in the cpu read domain, set ourself into the gttgoto err_unpin;
- read domain and manually flush cachelines (if required). This
@@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE; -out: /* return with the pages pinned */ return 0; @@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
return ret;
- ret = i915_gem_object_pin_pages(obj); if (ret) return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
else
goto out;
- }
!static_cpu_has(X86_FEATURE_CLFLUSH))
i915_gem_object_set_to_cpu_domain(obj, true);
- else
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
- if (ret)
/* If we're not in the cpu write domain, set ourself into thegoto err_unpin;
- gtt write domain and manually flush cachelines (as required).
@@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, *needs_clflush |= CLFLUSH_BEFORE; } -out: i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..40fda9e81a78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL;
err = i915_gem_object_set_to_gtt_domain(obj, true);
if (err)
return ERR_PTR(err);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE |i915_gem_object_set_to_gtt_domain(obj, true);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..8bbc835e70ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); -int __must_check -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
+void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww,bool write);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0727d0c76aa0..b8f0413bc3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -188,6 +188,12 @@ struct drm_i915_gem_object { unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
- /*
* Note cache_dirty is only a guide; we know when we have written
* through the CPU cache, but we do not know when the CPU may have
* speculatively populated the cache. Before a read via the cache
* of GPU written memory, we have to cautiously invalidate the cache.
unsigned int cache_dirty:1; /***/
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 33dd4e2a1010..d85ca79ac433 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce, u32 dw, u32 val) {
- int err;
- i915_gem_object_lock(vma->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
- i915_gem_object_unlock(vma->obj);
- if (err)
return err;
- return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..77ba6d1ef4e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) int err = 0; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) int err = 0; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
- i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) int err; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
- i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_ALL |
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) int err; i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
- i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj);
- err = i915_gem_object_wait(ctx->obj,
I915_WAIT_INTERRUPTIBLE,
if (err) return err;HZ / 2);
@@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(vma); i915_gem_object_lock(ctx->obj, NULL);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- if (err)
goto out_unlock;
- i915_gem_object_set_to_gtt_domain(ctx->obj, false);
IIRC Daniel pointed out that this looks odd, since this now becomes write=false for some reason. I think keep this as write=true, since it does look like that is what gpu_set wants.