This patch series re-work a few i915 functions to use drm_clflush_virt_range instead of calling clflush or clflushopt directly. This will prevent errors when building for non-x86 architectures.
v2: s/PAGE_SIZE/sizeof(value) for Re-work intel_write_status_page and added more patches to convert additional clflush/clflushopt to use drm_clflush*. (Michael Cheng)
v3: Drop invalidate_csb_entries and directly invoke drm_clflush_virt_ran
v4: Remove extra memory barriers
v5: s/cache_clflush_range/drm_clflush_virt_range
v6: Fix up "Drop invalidate_csb_entries" to use correct parameters. Also added in arm64 support for drm_clflush_virt_range.
v7: Re-order patches, and use correct macro for dcache flush for arm64.
v8: Remove ifdef for asm/cacheflush.
v9: Rebased
v10: Replaced asm/cacheflush with linux/cacheflush
v11: Correctly get the sizeof certian addresses. Also rebased to the latest.
v12: Drop include of cacheflush.h and use caches_clean_inval_pou instead of dcache_clean_inval_poc, since it is not exported for other modules to use.
v13: Drop arm64 implementation for drm_clflush_virt_range. This series will focus more on making i915 more architecture neutral by abstracting all clflush and clflush opt to the drm layer.
Michael Cheng (5): drm/i915/gt: Re-work intel_write_status_page drm/i915/gt: Drop invalidate_csb_entries drm/i915/gt: Re-work reset_csb drm/i915/: Re-work clflush_write32 drm/i915/gt: replace cache_clflush_range
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++----- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 12 +++++------ drivers/gpu/drm/i915/gt/intel_engine.h | 13 ++++-------- .../drm/i915/gt/intel_execlists_submission.c | 20 +++++++------------ drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 7 files changed, 23 insertions(+), 36 deletions(-)
Re-work intel_write_status_page to use drm_clflush_virt_range. This will prevent compiler errors when building for non-x86 architectures.
Signed-off-by: Michael Cheng michael.cheng@intel.com Reviewed-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 1c0ab05c3c40..1431f1e9dbee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -4,6 +4,7 @@
#include <asm/cacheflush.h> #include <drm/drm_util.h> +#include <drm/drm_cache.h>
#include <linux/hashtable.h> #include <linux/irq_work.h> @@ -143,15 +144,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * of extra paranoia to try and ensure that the HWS takes the value * we give and that it doesn't end up trapped inside the CPU! */ - if (static_cpu_has(X86_FEATURE_CLFLUSH)) { - mb(); - clflush(&engine->status_page.addr[reg]); - engine->status_page.addr[reg] = value; - clflush(&engine->status_page.addr[reg]); - mb(); - } else { - WRITE_ONCE(engine->status_page.addr[reg], value); - } + drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value)); + WRITE_ONCE(engine->status_page.addr[reg], value); + drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value)); }
/*
Drop invalidate_csb_entries and directly call drm_clflush_virt_range. This allows for one less function call, and prevent complier errors when building for non-x86 architectures.
v2(Michael Cheng): Drop invalidate_csb_entries function and directly invoke drm_clflush_virt_range. Thanks to Tvrtko for the sugguestion.
v3(Michael Cheng): Use correct parameters for drm_clflush_virt_range. Thanks to Tvrtko for pointing this out.
v4(Michael Cheng): Simplify &execlists->csb_status[0] to execlists->csb_status. Thanks to Matt Roper for the suggestion.
Signed-off-by: Michael Cheng michael.cheng@intel.com Reviewed-by: Matt Roper matthew.d.roper@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e1470bb60f34..5f8cf4942f07 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1651,12 +1651,6 @@ cancel_port_requests(struct intel_engine_execlists * const execlists, return inactive; }
-static void invalidate_csb_entries(const u64 *first, const u64 *last) -{ - clflush((void *)first); - clflush((void *)last); -} - /* * Starting with Gen12, the status has a new format: * @@ -2004,7 +1998,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * the wash as hardware, working or not, will need to do the * invalidation before. */ - invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); + drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
/* * We assume that any event reflects a change in context flow @@ -2788,8 +2782,9 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
/* Check that the GPU does indeed update the CSB entries! */ memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); + drm_clflush_virt_range(execlists->csb_status, + execlists->csb_size * + sizeof(execlists->csb_status));
/* Once more for luck and our trusty paranoia */ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
Use drm_clflush_virt_range instead of directly invoking clflush. This will prevent compiler errors when building for non-x86 architectures.
v2(Michael Cheng): Remove extra clflush
v3(Michael Cheng): Remove memory barrier since drm_clflush_virt_range takes care of it.
v4(Michael Cheng): Get the size of value and not the size of the pointer when passing in execlists->csb_write. Thanks to Matt Roper for pointing this out.
Signed-off-by: Michael Cheng michael.cheng@intel.com Reviewed-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 5f8cf4942f07..46a2087a66de 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2953,9 +2953,8 @@ reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) { struct intel_engine_execlists * const execlists = &engine->execlists;
- mb(); /* paranoia: read the CSB pointers from after the reset */ - clflush(execlists->csb_write); - mb(); + drm_clflush_virt_range(execlists->csb_write, + sizeof(execlists->csb_write[0]));
inactive = process_csb(engine, inactive); /* drain preemption events */
Use drm_clflush_virt_range instead of clflushopt and remove the memory barrier, since drm_clflush_virt_range takes care of that.
v2(Michael Cheng): Use sizeof(*addr) instead of sizeof(addr) to get the actual size of the page. Thanks to Matt Roper for pointing this out.
Signed-off-by: Michael Cheng michael.cheng@intel.com Reviewed-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 631bc268e7c8..42a49fd2f2ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1334,10 +1334,8 @@ static void *reloc_vaddr(struct i915_vma *vma, static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { - if (flushes & CLFLUSH_BEFORE) { - clflushopt(addr); - mb(); - } + if (flushes & CLFLUSH_BEFORE) + drm_clflush_virt_range(addr, sizeof(*addr));
*addr = value;
@@ -1349,7 +1347,7 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) * to ensure ordering of clflush wrt to the system. */ if (flushes & CLFLUSH_AFTER) - clflushopt(addr); + drm_clflush_virt_range(addr, sizeof(*addr)); } else *addr = value; }
Replace all occurrence of cache_clflush_range with drm_clflush_virt_range. This will prevent compile errors on non-x86 platforms.
Signed-off-by: Michael Cheng michael.cheng@intel.com Reviewed-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 12 ++++++------ drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f574da00eff1..c7bd5d71b03e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -454,11 +454,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, pd = pdp->entry[gen8_pd_index(idx, 2)]; }
- clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE);
return idx; } @@ -631,7 +631,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, } } while (rem >= page_size && index < I915_PDES);
- clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE);
/* * Is it safe to mark the 2M block as 64K? -- Either we have @@ -647,7 +647,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K;
/* @@ -668,7 +668,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15);
- clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); } }
@@ -722,7 +722,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
vaddr = px_vaddr(pt); vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); - clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); + drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); }
static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 46a2087a66de..68a8160ce3e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2828,7 +2828,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index aed6de2d5a79..719fd31eee80 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -298,7 +298,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) void *vaddr = __px_vaddr(p);
memset64(vaddr, val, count); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); }
static void poison_scratch_page(struct drm_i915_gem_object *scratch) diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index d91e2beb7517..d8b94d638559 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -91,7 +91,7 @@ write_dma_entry(struct drm_i915_gem_object * const pdma, u64 * const vaddr = __px_vaddr(pdma);
vaddr[idx] = encoded_entry; - clflush_cache_range(&vaddr[idx], sizeof(u64)); + drm_clflush_virt_range(&vaddr[idx], sizeof(u64)); }
void diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9ec03234d2c2..42c9e8b7bf42 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3581,7 +3581,7 @@ static void guc_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine); }
dri-devel@lists.freedesktop.org