The Xe_HP architecture introduces compute engines as a new engine class. These compute command streamers (CCS) are similar to the render engine, except that they're intended for GPGPU usage and lack support for the 3D pipeline.
For now we're just sending some initial "under the hood" preparation for CCS engines without actually exposing them to userspace or adding them to any platform's engine list yet. There may be a bit more GuC-related updates necessary before it's safe to expose them, so the actual uabi bits will come later once that's all worked out.
v2: - General rebase of patches from September. - Drop ABI bits for now; we'll make it visible to userspace later once all the GuC work is hammered out.
v3: - Flip order of RCU_MODE patch and GuC's ADS update to enable compute engines. (Daniele) - Replace fls(CCS_MASK) condition on fusing check with a simple IP version test to omit pre-Xe_HP platforms. - Fix a handful of checkpatch warnings.
Daniele Ceraolo Spurio (3): drm/i915/xehp: compute engine pipe_control drm/i915/xehp/guc: enable compute engine inside GuC drm/i915/xehp: handle fused off CCS engines
Matt Roper (8): drm/i915/xehp: Define compute class and engine drm/i915/xehp: CCS shares the render reset domain drm/i915/xehp: Add Compute CS IRQ handlers drm/i915/xehp: CCS should use RCS setup functions drm/i915: Move context descriptor fields to intel_lrc.h drm/i915/xehp: Define context scheduling attributes in lrc descriptor drm/i915/xehp: Enable ccs/dual-ctx in RCU_MODE drm/i915/xehp: Add compute workarounds
Matthew Brost (1): drm/i915/xehp: Don't support parallel submission on compute / render
Srinivasan Shanmugam (1): drm/i915/xehpsdv: Move render/compute engine reset domains related workarounds
drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++ .../drm/i915/gem/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 34 ++++++-- drivers/gpu/drm/i915/gt/intel_engine.h | 2 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 83 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 11 ++- drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +- .../drm/i915/gt/intel_execlists_submission.c | 24 +++++- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 15 ++++ drivers/gpu/drm/i915/gt/intel_gt_irq.c | 15 +++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 45 +++------- drivers/gpu/drm/i915/gt/intel_lrc.c | 12 ++- drivers/gpu/drm/i915/gt/intel_lrc.h | 51 ++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 17 +++- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 73 +++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 5 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 32 +++++-- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 18 +++- .../drm/i915/gt/uc/selftest_guc_multi_lrc.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 4 + 22 files changed, 394 insertions(+), 80 deletions(-)
Introduce a Compute Command Streamer (CCS), which has access to the media and GPGPU pipelines (but not the 3D pipeline).
To begin with, define the compute class/engine common functions, based on the existing render ones.
v2: - Add kerneldoc for drm_i915_gem_engine_class since we're adding a new element to it. (Daniel) - Make engine class <-> guc class converters use lookup tables to make it more clear/explicit how the IDs map. (Tvrtko)
v3: - Don't update uapi for now; we'll just include the driver-internal changes for the time being.
Bspec: 46167, 45544 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi rodrigo.vivi@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com #v1 --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 28 +++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 +++++- drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 32 ++++++++++++++------ drivers/gpu/drm/i915/i915_reg.h | 4 +++ 6 files changed, 71 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e855c801ba28..3190b7b462a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -156,6 +156,34 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } }, }, + [CCS0] = { + .class = COMPUTE_CLASS, + .instance = 0, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE } + } + }, + [CCS1] = { + .class = COMPUTE_CLASS, + .instance = 1, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE } + } + }, + [CCS2] = { + .class = COMPUTE_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE } + } + }, + [CCS3] = { + .class = COMPUTE_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE } + } + }, };
/** diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 36365bdbe1ee..f4533ccafbaf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -33,7 +33,8 @@ #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 -#define MAX_ENGINE_CLASS 4 +#define COMPUTE_CLASS 5 +#define MAX_ENGINE_CLASS 5 #define MAX_ENGINE_INSTANCE 7
#define I915_MAX_SLICES 3 @@ -95,6 +96,7 @@ struct i915_ctx_workarounds {
#define I915_MAX_VCS 8 #define I915_MAX_VECS 4 +#define I915_MAX_CCS 4
/* * Engine IDs definitions. @@ -117,6 +119,11 @@ enum intel_engine_id { VECS2, VECS3, #define _VECS(n) (VECS0 + (n)) + CCS0, + CCS1, + CCS2, + CCS3, +#define _CCS(n) (CCS0 + (n)) I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 9ce85a845105..b8c9b6b89003 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -47,6 +47,7 @@ static const u8 uabi_classes[] = { [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, + /* TODO: Add COMPUTE_CLASS mapping once ABI is available */ };
static int engine_cmp(void *priv, const struct list_head *A, @@ -139,6 +140,7 @@ const char *intel_engine_class_repr(u8 class) [COPY_ENGINE_CLASS] = "bcs", [VIDEO_DECODE_CLASS] = "vcs", [VIDEO_ENHANCEMENT_CLASS] = "vecs", + [COMPUTE_CLASS] = "ccs", };
if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class]) @@ -162,6 +164,7 @@ static int legacy_ring_idx(const struct legacy_ring *ring) [COPY_ENGINE_CLASS] = { BCS0, 1 }, [VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS }, [VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS }, + [COMPUTE_CLASS] = { CCS0, I915_MAX_CCS }, };
if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) @@ -190,7 +193,7 @@ static void add_legacy_ring(struct legacy_ring *ring, void intel_engines_driver_register(struct drm_i915_private *i915) { struct legacy_ring ring = {}; - u8 uabi_instances[4] = {}; + u8 uabi_instances[5] = {}; struct list_head *it, *next; struct rb_node **p, *prev; LIST_HEAD(engines); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d752db5669dd..530807bfe9a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1452,6 +1452,10 @@ #define GEN11_KCR (19) #define GEN11_GTPM (16) #define GEN11_BCS (15) +#define GEN12_CCS3 (7) +#define GEN12_CCS2 (6) +#define GEN12_CCS1 (5) +#define GEN12_CCS0 (4) #define GEN11_RCS0 (0) #define GEN11_VECS(x) (31 - (x)) #define GEN11_VCS(x) (x) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 6a4612a852e2..4b300b6cc0f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -46,8 +46,8 @@ #define GUC_VIDEO_CLASS 1 #define GUC_VIDEOENHANCE_CLASS 2 #define GUC_BLITTER_CLASS 3 -#define GUC_RESERVED_CLASS 4 -#define GUC_LAST_ENGINE_CLASS GUC_RESERVED_CLASS +#define GUC_COMPUTE_CLASS 4 +#define GUC_LAST_ENGINE_CLASS GUC_COMPUTE_CLASS #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32
@@ -156,23 +156,37 @@ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \ )
+/* the GuC arrays don't include OTHER_CLASS */ +static u8 engine_class_guc_class_map[] = { + [RENDER_CLASS] = GUC_RENDER_CLASS, + [COPY_ENGINE_CLASS] = GUC_BLITTER_CLASS, + [VIDEO_DECODE_CLASS] = GUC_VIDEO_CLASS, + [VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS, + [COMPUTE_CLASS] = GUC_COMPUTE_CLASS, +}; + +static u8 guc_class_engine_class_map[] = { + [GUC_RENDER_CLASS] = RENDER_CLASS, + [GUC_BLITTER_CLASS] = COPY_ENGINE_CLASS, + [GUC_VIDEO_CLASS] = VIDEO_DECODE_CLASS, + [GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS, + [GUC_COMPUTE_CLASS] = COMPUTE_CLASS, +}; + static inline u8 engine_class_to_guc_class(u8 class) { - BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); - BUILD_BUG_ON(GUC_BLITTER_CLASS != COPY_ENGINE_CLASS); - BUILD_BUG_ON(GUC_VIDEO_CLASS != VIDEO_DECODE_CLASS); - BUILD_BUG_ON(GUC_VIDEOENHANCE_CLASS != VIDEO_ENHANCEMENT_CLASS); + BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS + 1); GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS);
- return class; + return engine_class_guc_class_map[class]; }
static inline u8 guc_class_to_engine_class(u8 guc_class) { + BUILD_BUG_ON(ARRAY_SIZE(guc_class_engine_class_map) != GUC_LAST_ENGINE_CLASS + 1); GEM_BUG_ON(guc_class > GUC_LAST_ENGINE_CLASS); - GEM_BUG_ON(guc_class == GUC_RESERVED_CLASS);
- return guc_class; + return guc_class_engine_class_map[guc_class]; }
/* Work item for submitting workloads into work queue of GuC. */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a2f36ef91cec..6db8c426daa1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -971,6 +971,10 @@ #define GEN11_VEBOX2_RING_BASE 0x1d8000 #define XEHP_VEBOX3_RING_BASE 0x1e8000 #define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define GEN12_COMPUTE0_RING_BASE 0x1a000 +#define GEN12_COMPUTE1_RING_BASE 0x1c000 +#define GEN12_COMPUTE2_RING_BASE 0x1e000 +#define GEN12_COMPUTE3_RING_BASE 0x26000 #define BLT_RING_BASE 0x22000
The reset domain is shared between render and all compute engines, so resetting one will affect the others.
Note: Before performing a reset on an RCS or CCS engine, the GuC will attempt to preempt-to-idle the other non-hung RCS/CCS engines to avoid impacting other clients (since some shared modules will be reset). If other engines are executing non-preemptable workloads, the impact is unavoidable and some work may be lost.
Bspec: 52549 Original-author: Michel Thierry Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3190b7b462a9..3150c0847f65 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -341,6 +341,10 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id) [VECS1] = GEN11_GRDOM_VECS2, [VECS2] = GEN11_GRDOM_VECS3, [VECS3] = GEN11_GRDOM_VECS4, + [CCS0] = GEN11_GRDOM_RENDER, + [CCS1] = GEN11_GRDOM_RENDER, + [CCS2] = GEN11_GRDOM_RENDER, + [CCS3] = GEN11_GRDOM_RENDER, }; GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || !engine_reset_domains[id]);
Add execlists and GuC interrupts for compute CS into existing IRQ handlers.
All compute command streamers belong to the same compute class, so the only change needed to enable their interrupts is to program their GT engine interrupt mask registers.
CCS0 shares the register with CCS1, while CCS2 and CCS3 are in a new one.
BSpec: 50844, 54029, 54030, 53223, 53224. Original-author: Michel Thierry Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 15 ++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 983264e10e0a..e443ac4c8059 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -100,7 +100,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) if (unlikely(!intr)) return;
- if (class <= COPY_ENGINE_CLASS) + if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS) return gen11_engine_irq_handler(gt, class, instance, intr);
if (class == OTHER_CLASS) @@ -182,6 +182,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt) /* Disable RCS, BCS, VCS and VECS class engines. */ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); + if (CCS_MASK(gt)) + intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); @@ -195,6 +197,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0); + if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1)) + intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0); + if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) + intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -225,6 +231,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) /* Enable RCS, BCS, VCS and VECS class interrupts. */ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask); intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); + if (CCS_MASK(gt)) + intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); @@ -238,6 +246,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1)) + intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) + intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask); + /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 530807bfe9a0..69b826a3c381 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1468,6 +1468,7 @@ #define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) #define GEN11_CRYPTO_RSVD_INTR_ENABLE _MMIO(0x190040) #define GEN11_GUNIT_CSME_INTR_ENABLE _MMIO(0x190044) +#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048)
#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) #define GEN11_INTR_DATA_VALID (1 << 31) @@ -1493,6 +1494,8 @@ #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) #define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4) +#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100) +#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104)
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d26a3eaee270..457bc1993d19 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1249,6 +1249,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) #define VEBOX_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) +#define CCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS)
/* * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
CCS will reuse the RCS functions for breadcrumb and flush emission. However, CCS pipe_control has additional programming restrictions: - Command Streamer Stall Enable must be always set - Post Sync Operations must not be set to Write PS Depth Count - 3D-related bits must not be set
v2: - Drop unwanted blank line. (Lucas)
Bspec: 47112 Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 34 ++++++++++++++------ drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 15 +++++++++ 2 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 1f8cf4f790b2..b1b9c3fd7bf9 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -201,6 +201,8 @@ static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { + struct intel_engine_cs *engine = rq->engine; + if (mode & EMIT_FLUSH) { u32 flags = 0; u32 *cs; @@ -219,6 +221,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CS_STALL;
+ if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -246,6 +251,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CS_STALL;
+ if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = intel_ring_begin(rq, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -618,19 +626,27 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + struct drm_i915_private *i915 = rq->engine->i915; + u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + /* Wa_1409600907 */ + flags |= PIPE_CONTROL_DEPTH_STALL; + + if (rq->engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + flags);
return gen12_emit_fini_breadcrumb_tail(rq, cs); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index f8253012d166..d112ffd56418 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -228,11 +228,14 @@ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */ #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */ #define PIPE_CONTROL_FLUSH_L3 (1<<27) +#define PIPE_CONTROL_AMFS_FLUSH (1<<25) /* gen12+ */ #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_PSD_SYNC (1<<17) /* gen11+ */ #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) @@ -254,6 +257,18 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+/* 3D-related flags can't be set on compute engine */ +#define PIPE_CONTROL_3D_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + #define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) /* Opcodes for MI_MATH_INSTR */
The compute engine handles the same commands the render engine can (except 3D pipeline), so it makes sense that CCS is more similar to RCS than non-render engines.
The CCS context state (lrc) is also similar to the render one, so reuse it. Note that the compute engine has its own CTX_R_PWR_CLK_STATE register.
In order to avoid having multiple RCS && CCS checks, add the following engine flag: - I915_ENGINE_HAS_RCS_REG_STATE - use the render (larger) reg state ctx.
BSpec: 46260 Original-author: Michel Thierry Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 8 +++++--- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index bd60d42238fb..7609db87df05 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -885,7 +885,9 @@ static int igt_shared_ctx_exec(void *arg) return err; }
-static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) +static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, + struct i915_vma *vma, + struct intel_engine_cs *engine) { u32 *cmd;
@@ -896,7 +898,7 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *v return PTR_ERR(cmd);
*cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE)); + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); *cmd++ = lower_32_bits(vma->node.start); *cmd++ = upper_32_bits(vma->node.start); *cmd = MI_BATCH_BUFFER_END; @@ -957,7 +959,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto err_vma;
- err = rpcs_query_batch(rpcs, vma); + err = rpcs_query_batch(rpcs, vma, ce->engine); if (err) goto err_batch;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3150c0847f65..edba18c942cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -208,6 +208,8 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
switch (class) { + case COMPUTE_CLASS: + fallthrough; case RENDER_CLASS: switch (GRAPHICS_VER(gt->i915)) { default: @@ -431,6 +433,10 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0;
+ /* features common between engines sharing EUs */ + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) + engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->defaults = engine->props; /* never to change again */
engine->context_size = intel_engine_context_size(gt, engine->class); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f4533ccafbaf..5fa5f21bbf2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -524,6 +524,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) +#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) unsigned int flags;
/* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 961d795220a3..47fca5ebfa76 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3480,7 +3480,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine);
- if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine);
lrc_init_wa_ctx(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 004e1216e654..d333400d29fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -623,7 +623,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && !intel_engine_has_relative_mmio(engine));
- if (engine->class == RENDER_CLASS) { + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -1619,7 +1619,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) unsigned int i; int err;
- if (engine->class != RENDER_CLASS) + if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) return;
switch (GRAPHICS_VER(engine->i915)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b0de9110884a..891b98236155 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3776,7 +3776,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc_default_irqs(engine); guc_init_breadcrumbs(engine);
- if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
This is a more appropriate header for these definitions.
v2: - Cleanup whitespace. (Lucas)
Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 34 ----------------------- drivers/gpu/drm/i915/gt/intel_lrc.h | 34 +++++++++++++++++++++++ 3 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index edba18c942cf..b0982a9e4476 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,6 +21,7 @@ #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_gt_pm.h" +#include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 69b826a3c381..84f189738a68 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1499,38 +1499,4 @@
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
-enum { - INTEL_ADVANCED_CONTEXT = 0, - INTEL_LEGACY_32B_CONTEXT, - INTEL_ADVANCED_AD_CONTEXT, - INTEL_LEGACY_64B_CONTEXT -}; - -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; - -#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) -#define GEN8_CTX_VALID (1 << 0) -#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) -#define GEN8_CTX_FORCE_RESTORE (1 << 2) -#define GEN8_CTX_L3LLC_COHERENT (1 << 5) -#define GEN8_CTX_PRIVILEGE (1 << 8) -#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 -#define GEN11_SW_CTX_ID_SHIFT 37 -#define GEN11_SW_CTX_ID_WIDTH 11 -#define GEN11_ENGINE_CLASS_SHIFT 61 -#define GEN11_ENGINE_CLASS_WIDTH 3 -#define GEN11_ENGINE_INSTANCE_SHIFT 48 -#define GEN11_ENGINE_INSTANCE_WIDTH 6 -#define XEHP_SW_CTX_ID_SHIFT 39 -#define XEHP_SW_CTX_ID_WIDTH 16 -#define XEHP_SW_COUNTER_SHIFT 58 -#define XEHP_SW_COUNTER_WIDTH 6 - #endif /* __INTEL_GT_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 0b76f096b559..bb0e6c5b9922 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -69,4 +69,38 @@ void lrc_check_regs(const struct intel_context *ce,
void lrc_update_runtime(struct intel_context *ce);
+enum { + INTEL_ADVANCED_CONTEXT = 0, + INTEL_LEGACY_32B_CONTEXT, + INTEL_ADVANCED_AD_CONTEXT, + INTEL_LEGACY_64B_CONTEXT +}; + +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) +#define GEN8_CTX_VALID (1 << 0) +#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) +#define GEN8_CTX_FORCE_RESTORE (1 << 2) +#define GEN8_CTX_L3LLC_COHERENT (1 << 5) +#define GEN8_CTX_PRIVILEGE (1 << 8) +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 +#define XEHP_SW_COUNTER_SHIFT 58 +#define XEHP_SW_COUNTER_WIDTH 6 + #endif /* __INTEL_LRC_H__ */
In Dual Context mode the EUs are shared between render and compute command streamers. The hardware provides a field in the lrc descriptor to indicate the prioritization of the thread dispatch associated to the corresponding context.
The context priority is set to 'low' at creation time and relies on the existing context priority to set it to low/normal/high.
Bspec: 46145, 46260 Original-author: Michel Thierry Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Prasad Nallani prasad.nallani@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + .../drm/i915/gt/intel_execlists_submission.c | 6 +++++- drivers/gpu/drm/i915/gt/intel_lrc.h | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b0982a9e4476..2136c56d3abc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -435,8 +435,10 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->props.preempt_timeout_ms = 0;
/* features common between engines sharing EUs */ - if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; + }
engine->defaults = engine->props; /* never to change again */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5fa5f21bbf2d..19ff8758e34d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -525,6 +525,7 @@ struct intel_engine_cs { #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) +#define I915_ENGINE_HAS_EU_PRIORITY BIT(10) unsigned int flags;
/* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 47fca5ebfa76..c8407cc96c42 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -665,9 +665,13 @@ static inline void execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc.desc; + u64 desc; u32 tail, prev;
+ desc = ce->lrc.desc; + if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + desc |= lrc_desc_priority(rq_prio(rq)); + /* * WaIdleLiteRestore:bdw,skl * diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index bb0e6c5b9922..6e4f9f58fca5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -6,6 +6,9 @@ #ifndef __INTEL_LRC_H__ #define __INTEL_LRC_H__
+#include "i915_priolist_types.h" + +#include <linux/bitfield.h> #include <linux/types.h>
struct drm_i915_gem_object; @@ -90,6 +93,10 @@ enum { #define GEN8_CTX_L3LLC_COHERENT (1 << 5) #define GEN8_CTX_PRIVILEGE (1 << 8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN12_CTX_PRIORITY_MASK GENMASK(10, 9) +#define GEN12_CTX_PRIORITY_HIGH FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 2) +#define GEN12_CTX_PRIORITY_NORMAL FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 1) +#define GEN12_CTX_PRIORITY_LOW FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 0) #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 #define GEN11_SW_CTX_ID_SHIFT 37 @@ -103,4 +110,14 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6
+static inline u32 lrc_desc_priority(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + #endif /* __INTEL_LRC_H__ */
We have to specify in the Render Control Unit Mode register when CCS is enabled.
v2: - Move RCU_MODE programming to a helper function. (Tvrtko) - Clean up and clarify comments. (Tvrtko) - Add RCU_MODE to the GuC save/restore list. (Daniele) v3: - Move this patch before the GuC ADS update to enable compute engines; the definition of RCU_MODE and its insertion into the save/restore list moves to this patch. (Daniele)
Bspec: 46034 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++++++++++ .../drm/i915/gt/intel_execlists_submission.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++++++++++ 6 files changed, 58 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index be4b1e65442f..1c0ab05c3c40 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
void intel_engine_destroy_pinned_context(struct intel_context *ce);
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine); + #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2136c56d3abc..92f4cf9833ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2070,6 +2070,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) return active; }
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine) +{ + /* + * If there are any non-fused-off CCS engines, we need to enable CCS + * support in the RCU_MODE register. This only needs to be done once, + * so for simplicity we'll take care of this in the RCS engine's + * resume handler; since the RCS and all CCS engines belong to the + * same reset domain and are reset together, this will also take care + * of re-applying the setting after i915-triggered resets. + */ + if (!CCS_MASK(engine->gt)) + return; + + intel_uncore_write(engine->uncore, GEN12_RCU_MODE, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c8407cc96c42..574c0542c92f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2914,6 +2914,19 @@ static int execlists_resume(struct intel_engine_cs *engine) return 0; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{ + int ret; + + ret = execlists_resume(engine); + if (ret) + return ret; + + xehp_enable_ccs_engines(engine); + + return 0; +} + static void execlists_reset_prepare(struct intel_engine_cs *engine) { ENGINE_TRACE(engine, "depth<-%d\n", @@ -3468,6 +3481,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; } + + if (engine->class == RENDER_CLASS) + engine->resume = gen12_rcs_resume; }
int intel_execlists_submission_setup(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 84f189738a68..e629443e07ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1327,6 +1327,9 @@ #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) + #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 847e00390b00..29fbe4681ca7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -335,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+ if (engine->class == RENDER_CLASS && + CCS_MASK(engine->gt)) + ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 891b98236155..7e248e2001de 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3603,6 +3603,19 @@ static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) return !sched_engine->tasklet.callback; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{ + int ret; + + ret = guc_resume(engine); + if (ret) + return ret; + + xehp_enable_ccs_engines(engine); + + return 0; +} + static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; @@ -3723,6 +3736,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; } + + if (engine->class == RENDER_CLASS) + engine->resume = gen12_rcs_resume; }
static inline void guc_default_irqs(struct intel_engine_cs *engine)
On Tue, Mar 01, 2022 at 03:15:44PM -0800, Matt Roper wrote:
We have to specify in the Render Control Unit Mode register when CCS is enabled.
v2:
- Move RCU_MODE programming to a helper function. (Tvrtko)
- Clean up and clarify comments. (Tvrtko)
- Add RCU_MODE to the GuC save/restore list. (Daniele)
v3:
- Move this patch before the GuC ADS update to enable compute engines; the definition of RCU_MODE and its insertion into the save/restore list moves to this patch. (Daniele)
Bspec: 46034 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_engine.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++++++++++ .../drm/i915/gt/intel_execlists_submission.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++++++++++ 6 files changed, 58 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index be4b1e65442f..1c0ab05c3c40 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
void intel_engine_destroy_pinned_context(struct intel_context *ce);
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine);
#define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2136c56d3abc..92f4cf9833ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2070,6 +2070,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) return active; }
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine) +{
- /*
* If there are any non-fused-off CCS engines, we need to enable CCS
* support in the RCU_MODE register. This only needs to be done once,
* so for simplicity we'll take care of this in the RCS engine's
* resume handler; since the RCS and all CCS engines belong to the
* same reset domain and are reset together, this will also take care
* of re-applying the setting after i915-triggered resets.
*/
- if (!CCS_MASK(engine->gt))
return;
- intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c8407cc96c42..574c0542c92f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2914,6 +2914,19 @@ static int execlists_resume(struct intel_engine_cs *engine) return 0; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{
- int ret;
- ret = execlists_resume(engine);
- if (ret)
return ret;
- xehp_enable_ccs_engines(engine);
- return 0;
+}
static void execlists_reset_prepare(struct intel_engine_cs *engine) { ENGINE_TRACE(engine, "depth<-%d\n", @@ -3468,6 +3481,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; }
- if (engine->class == RENDER_CLASS)
engine->resume = gen12_rcs_resume;
}
int intel_execlists_submission_setup(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 84f189738a68..e629443e07ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1327,6 +1327,9 @@ #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 847e00390b00..29fbe4681ca7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -335,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
- if (engine->class == RENDER_CLASS &&
CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 891b98236155..7e248e2001de 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3603,6 +3603,19 @@ static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) return !sched_engine->tasklet.callback; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{
- int ret;
- ret = guc_resume(engine);
- if (ret)
return ret;
- xehp_enable_ccs_engines(engine);
- return 0;
+}
static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; @@ -3723,6 +3736,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; }
- if (engine->class == RENDER_CLASS)
engine->resume = gen12_rcs_resume;
Why not just have guc_resume and execlist_resume call xehp_enable_ccs_engines(engine) for render case?
Also what happens if render itself is not present/fused-off (if there is such a thing)?
Just those questions, overall the patch looks fine as is:
Reviewed-by: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com
Umesh
}
static inline void guc_default_irqs(struct intel_engine_cs *engine)
2.34.1
On Tue, Mar 01, 2022 at 03:51:21PM -0800, Umesh Nerlige Ramappa wrote:
On Tue, Mar 01, 2022 at 03:15:44PM -0800, Matt Roper wrote:
We have to specify in the Render Control Unit Mode register when CCS is enabled.
v2:
- Move RCU_MODE programming to a helper function. (Tvrtko)
- Clean up and clarify comments. (Tvrtko)
- Add RCU_MODE to the GuC save/restore list. (Daniele)
v3:
- Move this patch before the GuC ADS update to enable compute engines; the definition of RCU_MODE and its insertion into the save/restore list moves to this patch. (Daniele)
Bspec: 46034 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_engine.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++++++++++ .../drm/i915/gt/intel_execlists_submission.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++++++++++ 6 files changed, 58 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index be4b1e65442f..1c0ab05c3c40 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
void intel_engine_destroy_pinned_context(struct intel_context *ce);
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine);
#define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2136c56d3abc..92f4cf9833ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2070,6 +2070,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) return active; }
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine) +{
- /*
* If there are any non-fused-off CCS engines, we need to enable CCS
* support in the RCU_MODE register. This only needs to be done once,
* so for simplicity we'll take care of this in the RCS engine's
* resume handler; since the RCS and all CCS engines belong to the
* same reset domain and are reset together, this will also take care
* of re-applying the setting after i915-triggered resets.
*/
- if (!CCS_MASK(engine->gt))
return;
- intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c8407cc96c42..574c0542c92f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2914,6 +2914,19 @@ static int execlists_resume(struct intel_engine_cs *engine) return 0; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{
- int ret;
- ret = execlists_resume(engine);
- if (ret)
return ret;
- xehp_enable_ccs_engines(engine);
- return 0;
+}
static void execlists_reset_prepare(struct intel_engine_cs *engine) { ENGINE_TRACE(engine, "depth<-%d\n", @@ -3468,6 +3481,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; }
- if (engine->class == RENDER_CLASS)
engine->resume = gen12_rcs_resume;
}
int intel_execlists_submission_setup(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 84f189738a68..e629443e07ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1327,6 +1327,9 @@ #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 847e00390b00..29fbe4681ca7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -335,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
- if (engine->class == RENDER_CLASS &&
CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 891b98236155..7e248e2001de 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3603,6 +3603,19 @@ static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) return !sched_engine->tasklet.callback; }
+static int gen12_rcs_resume(struct intel_engine_cs *engine) +{
- int ret;
- ret = guc_resume(engine);
- if (ret)
return ret;
- xehp_enable_ccs_engines(engine);
- return 0;
+}
static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; @@ -3723,6 +3736,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; break; }
- if (engine->class == RENDER_CLASS)
engine->resume = gen12_rcs_resume;
Why not just have guc_resume and execlist_resume call xehp_enable_ccs_engines(engine) for render case?
That does seem like it would be simpler; I'll make that change and resend.
Also what happens if render itself is not present/fused-off (if there is such a thing)?
There are still some patches that haven't been sent yet that switch some of the various conditions to apply to an engine with 'I915_ENGINE_FIRST_RENDER_COMPUTE' rather than always to the RCS0 engine. That will cover platforms where there is no render engine in the design, as well as platforms where the render engine is part of the architecture, but is fused off on some boards.
Matt
Just those questions, overall the patch looks fine as is:
Reviewed-by: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com
Umesh
}
static inline void guc_default_irqs(struct intel_engine_cs *engine)
2.34.1
We have to specify in the Render Control Unit Mode register when CCS is enabled.
v2: - Move RCU_MODE programming to a helper function. (Tvrtko) - Clean up and clarify comments. (Tvrtko) - Add RCU_MODE to the GuC save/restore list. (Daniele) v3: - Move this patch before the GuC ADS update to enable compute engines; the definition of RCU_MODE and its insertion into the save/restore list moves to this patch. (Daniele) v4: - Call xehp_enable_ccs_engines() directly in guc_resume() and execlists_resume() rather than adding an extra layer of wrapping to the engine->resume() vfunc. (Umesh)
Bspec: 46034 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++++++++++ .../drm/i915/gt/intel_execlists_submission.c | 3 +++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +++ 6 files changed, 32 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index be4b1e65442f..1c0ab05c3c40 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
void intel_engine_destroy_pinned_context(struct intel_context *ce);
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine); + #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2136c56d3abc..92f4cf9833ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2070,6 +2070,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) return active; }
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine) +{ + /* + * If there are any non-fused-off CCS engines, we need to enable CCS + * support in the RCU_MODE register. This only needs to be done once, + * so for simplicity we'll take care of this in the RCS engine's + * resume handler; since the RCS and all CCS engines belong to the + * same reset domain and are reset together, this will also take care + * of re-applying the setting after i915-triggered resets. + */ + if (!CCS_MASK(engine->gt)) + return; + + intel_uncore_write(engine->uncore, GEN12_RCU_MODE, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c8407cc96c42..3e0c81f06bd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2911,6 +2911,9 @@ static int execlists_resume(struct intel_engine_cs *engine)
enable_execlists(engine);
+ if (engine->class == RENDER_CLASS) + xehp_enable_ccs_engines(engine); + return 0; }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 84f189738a68..e629443e07ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1327,6 +1327,9 @@ #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) + #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 847e00390b00..29fbe4681ca7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -335,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+ if (engine->class == RENDER_CLASS && + CCS_MASK(engine->gt)) + ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 891b98236155..65690d4b2266 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3595,6 +3595,9 @@ static int guc_resume(struct intel_engine_cs *engine) setup_hwsp(engine); start_engine(engine);
+ if (engine->class == RENDER_CLASS) + xehp_enable_ccs_engines(engine); + return 0; }
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
Tell GuC that CCS is enabled by setting a bit in its ADS.
Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Original-author: Michel Thierry Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 29fbe4681ca7..9bb551b83e7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -434,6 +434,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, struct iosys_map *info_map) { info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
On 3/1/2022 3:15 PM, Matt Roper wrote:
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
Tell GuC that CCS is enabled by setting a bit in its ADS.
It's a mask, not a bit.
Reviewed-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
Daniele
Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Original-author: Michel Thierry Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 29fbe4681ca7..9bb551b83e7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -434,6 +434,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, struct iosys_map *info_map) { info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
- info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
On Tue, Mar 01, 2022 at 03:15:45PM -0800, Matt Roper wrote:
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
Tell GuC that CCS is enabled by setting a bit in its ADS.
Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Original-author: Michel Thierry Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
Reviewed-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 29fbe4681ca7..9bb551b83e7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -434,6 +434,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, struct iosys_map *info_map) { info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
- info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
-- 2.34.1
From: Matthew Brost matthew.brost@intel.com
A different emit breadcrumbs ring programming is required for compute / render and we don't have UMD user so just reject parallel submission for these engine classes.
Signed-off-by: Matthew Brost matthew.brost@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 ++++++++++ drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c | 4 ++++ 2 files changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index bc6d59df064d..9ae294eb7fb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -670,6 +670,16 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, goto out_err; }
+ /* + * We don't support breadcrumb handshake on these + * classes + */ + if (siblings[n]->class == RENDER_CLASS || + siblings[n]->class == COMPUTE_CLASS) { + err = -EINVAL; + goto out_err; + } + if (n) { if (prev_engine.engine_class != ci.engine_class) { diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 1297ddbf7f88..812220a43df8 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -154,6 +154,10 @@ static int intel_guc_multi_lrc_basic(void *arg) int ret;
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + /* We don't support breadcrumb handshake on these classes */ + if (class == COMPUTE_CLASS || class == RENDER_CLASS) + continue; + ret = __intel_guc_multi_lrc_basic(gt, class); if (ret) return ret;
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
HW resources are divided across the active CCS engines at the compute slice level, with each CCS having priority on one of the cslices. If a compute slice has no enabled DSS, its paired compute engine is not usable in full parallel execution because the other ones already fully saturate the HW, so consider it fused off.
v2 (José): - moved it to its own function - fixed definition of ccs_mask
v3 (Matt): - Replace fls() condition with a simple IP version test
Cc: Stuart Summers stuart.summers@intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Ashutosh Dixit ashutosh.dixit@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Stuart Summers stuart.summers@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 17 ++++++++++++--- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- 3 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 92f4cf9833ee..809747c20bc4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, return false; }
+static void engine_mask_apply_compute_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; + const unsigned long ccs_mask = + intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), + ss_per_ccs); + unsigned int i; + + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + return; + + /* + * If all DSS in a quadrant are fused off, the corresponding CCS + * engine is not available for use. + */ + for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { + info->engine_mask &= ~BIT(_CCS(i)); + drm_dbg(&i915->drm, "ccs%u fused off\n", i); + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+ engine_mask_apply_compute_fuses(gt); + return info->engine_mask; }
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 29118c652811..4ac0bbaf0c31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; }
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +static u32 +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, + const u8 *subslice_mask, u8 slice) { int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) GEM_BUG_ON(slice >= sseu->max_slices);
for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)sseu->subslice_mask[offset + i] << - i * BITS_PER_BYTE; + mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
return mask; }
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); +} + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) +{ + return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); +} + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 60882a74741e..8a79cd8eaab4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask);
On Tue, Mar 01, 2022 at 03:15:47PM -0800, Matt Roper wrote:
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
HW resources are divided across the active CCS engines at the compute slice level, with each CCS having priority on one of the cslices. If a compute slice has no enabled DSS, its paired compute engine is not usable in full parallel execution because the other ones already fully saturate the HW, so consider it fused off.
v2 (José):
- moved it to its own function
- fixed definition of ccs_mask
v3 (Matt):
- Replace fls() condition with a simple IP version test
Cc: Stuart Summers stuart.summers@intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Ashutosh Dixit ashutosh.dixit@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Stuart Summers stuart.summers@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
Reviewed-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 17 ++++++++++++--- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- 3 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 92f4cf9833ee..809747c20bc4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, return false; }
+static void engine_mask_apply_compute_fuses(struct intel_gt *gt) +{
- struct drm_i915_private *i915 = gt->i915;
- struct intel_gt_info *info = >->info;
- int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
- const unsigned long ccs_mask =
intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
ss_per_ccs);
- unsigned int i;
- if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
return;
- /*
* If all DSS in a quadrant are fused off, the corresponding CCS
* engine is not available for use.
*/
- for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
info->engine_mask &= ~BIT(_CCS(i));
drm_dbg(&i915->drm, "ccs%u fused off\n", i);
- }
+}
/*
- Determine which engines are fused off in our particular hardware.
- Note that we have a catch-22 situation where we need to be able to access
@@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
- engine_mask_apply_compute_fuses(gt);
- return info->engine_mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 29118c652811..4ac0bbaf0c31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; }
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +static u32 +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
const u8 *subslice_mask, u8 slice)
{ int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) GEM_BUG_ON(slice >= sseu->max_slices);
for (i = 0; i < sseu->ss_stride; i++)
mask |= (u32)sseu->subslice_mask[offset + i] <<
i * BITS_PER_BYTE;
mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
return mask;
}
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{
- return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+}
+u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) +{
- return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+}
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 60882a74741e..8a79cd8eaab4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask); -- 2.34.1
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
HW resources are divided across the active CCS engines at the compute slice level, with each CCS having priority on one of the cslices. If a compute slice has no enabled DSS, its paired compute engine is not usable in full parallel execution because the other ones already fully saturate the HW, so consider it fused off.
v2 (José): - moved it to its own function - fixed definition of ccs_mask
v3 (Matt): - Replace fls() condition with a simple IP version test
v4 (Matt): - Don't try to calculate a ccs_mask using intel_slicemask_from_dssmask() until we've determined that we're running on an Xe_HP platform where the logic makes sense (and won't overflow).
Cc: Stuart Summers stuart.summers@intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Ashutosh Dixit ashutosh.dixit@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Stuart Summers stuart.summers@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 17 ++++++++++++--- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- 3 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 92f4cf9833ee..e1aa78b20d2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, return false; }
+static void engine_mask_apply_compute_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; + unsigned long ccs_mask; + unsigned int i; + + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + return; + + ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), + ss_per_ccs); + /* + * If all DSS in a quadrant are fused off, the corresponding CCS + * engine is not available for use. + */ + for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { + info->engine_mask &= ~BIT(_CCS(i)); + drm_dbg(&i915->drm, "ccs%u fused off\n", i); + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+ engine_mask_apply_compute_fuses(gt); + return info->engine_mask; }
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 29118c652811..4ac0bbaf0c31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; }
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +static u32 +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, + const u8 *subslice_mask, u8 slice) { int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) GEM_BUG_ON(slice >= sseu->max_slices);
for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)sseu->subslice_mask[offset + i] << - i * BITS_PER_BYTE; + mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
return mask; }
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); +} + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) +{ + return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); +} + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 60882a74741e..8a79cd8eaab4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask);
Additional workarounds are required once we start exposing CCS engines.
Note that we have a number of workarounds that update registers in the shared render/compute reset domain. Historically we've just added such registers to the RCS engine's workaround list. But going forward we should be more careful to place such workarounds on a wa_list for an engine that definitely exists and is not fused off (e.g., a platform with no RCS would never apply the RCS wa_list). We'll keep rcs_engine_wa_init() focused on RCS-specific workarounds that only need to be applied if the RCS engine is present. A separate general_render_compute_wa_init() function will be used to define workarounds that touch registers in the shared render/compute reset domain and that we need to apply regardless of what render and/or compute engines actually exist. Any workarounds defined in this new function will internally be added to the first present RCS or CCS engine's workaround list to ensure they get applied (and only get applied once rather than being needlessly re-applied several times).
Co-author: Srinivasan Shanmugam Signed-off-by: Matt Roper matthew.d.roper@intel.com Reviewed-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 8 ++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 47 +++++++++++++++++++++ 3 files changed, 56 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index e629443e07ae..19cd34f24263 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1060,6 +1060,7 @@ #define FLOW_CONTROL_ENABLE REG_BIT(15) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) +#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8) #define STALL_DOP_GATING_DISABLE REG_BIT(5) #define THROTTLE_12_5 REG_GENMASK(4, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d333400d29fe..07bef7128fdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1217,6 +1217,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_timestamp_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs);
+ /* Wa_16013000631:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(ce->engine->i915)) + if (ce->engine->class == COMPUTE_CLASS) + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, + 0); + return cs; }
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0471d475e680..0b9435d62808 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1921,6 +1921,11 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4);
break; + case COMPUTE_CLASS: + /* Wa_16011157294:dg2_g10 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + break; default: break; } @@ -2581,6 +2586,40 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } }
+/* + * The workarounds in this function apply to shared registers in + * the general render reset domain that aren't tied to a + * specific engine. Since all render+compute engines get reset + * together, and the contents of these registers are lost during + * the shared render domain reset, we'll define such workarounds + * here and then add them to just a single RCS or CCS engine's + * workaround list (whichever engine has the XXXX flag). + */ +static void +general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_XEHPSDV(i915)) { + /* Wa_1409954639 */ + wa_masked_en(wal, + GEN8_ROW_CHICKEN, + SYSTOLIC_DOP_CLOCK_GATING_DIS); + + /* Wa_1607196519 */ + wa_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); + + /* Wa_14010670810:xehpsdv */ + wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + + /* Wa_14010449647:xehpsdv */ + wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + } +} + static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2589,6 +2628,14 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
engine_fake_wa_init(engine, wal);
+ /* + * These are common workarounds that just need to applied + * to a single RCS/CCS engine's workaround list since + * they're reset as part of the general render domain reset. + */ + if (engine->class == RENDER_CLASS) + general_render_compute_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else
From: Srinivasan Shanmugam srinivasan.s@intel.com
Registers that exist in the shared render/compute reset domain need to be placed on an engine workaround list to ensure that they are properly re-applied whenever an RCS or CCS engine is reset. We have a number of workarounds (updating registers MLTICTXCTL, L3SQCREG1_CCS0, GEN12_MERT_MOD_CTRL, and GEN12_GAMCNTRL_CTRL) that are incorrectly implemented on the 'gt' workaround list and need to be moved accordingly.
Cc: Matt Roper matthew.d.roper@intel.com Signed-off-by: Srinivasan Shanmugam srinivasan.s@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0b9435d62808..c014b40d2e9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1343,12 +1343,6 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409757795:xehpsdv */ wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
- /* Wa_18011725039:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); - } - /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1385,19 +1379,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMTLBVEBOX0_CLKGATE_DIS); }
- /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); - /* Wa_16012725990:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
/* Wa_14011060649:xehpsdv */ wa_14011060649(gt, wal); - - /* Wa_14014368820:xehpsdv */ - wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | - GLOBAL_INVALIDATION_MODE); }
static void @@ -2617,6 +2604,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_14010449647:xehpsdv */ wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* Wa_18011725039:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { + wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + } + + /* Wa_14012362059:xehpsdv */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_14014368820:xehpsdv */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); } }
On Tue, Mar 01, 2022 at 03:15:49PM -0800, Matt Roper wrote:
From: Srinivasan Shanmugam srinivasan.s@intel.com
Registers that exist in the shared render/compute reset domain need to be placed on an engine workaround list to ensure that they are properly re-applied whenever an RCS or CCS engine is reset. We have a number of workarounds (updating registers MLTICTXCTL, L3SQCREG1_CCS0, GEN12_MERT_MOD_CTRL, and GEN12_GAMCNTRL_CTRL) that are incorrectly implemented on the 'gt' workaround list and need to be moved accordingly.
Cc: Matt Roper matthew.d.roper@intel.com Signed-off-by: Srinivasan Shanmugam srinivasan.s@intel.com Signed-off-by: Matt Roper matthew.d.roper@intel.com
Reviewed-by: Matt Roper matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0b9435d62808..c014b40d2e9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1343,12 +1343,6 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409757795:xehpsdv */ wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
- /* Wa_18011725039:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
- }
- /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1385,19 +1379,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMTLBVEBOX0_CLKGATE_DIS); }
/* Wa_14012362059:xehpsdv */
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_16012725990:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
/* Wa_14011060649:xehpsdv */ wa_14011060649(gt, wal);
/* Wa_14014368820:xehpsdv */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
GLOBAL_INVALIDATION_MODE);
}
static void @@ -2617,6 +2604,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_14010449647:xehpsdv */ wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
/* Wa_18011725039:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
}
/* Wa_14012362059:xehpsdv */
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_14014368820:xehpsdv */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
}GLOBAL_INVALIDATION_MODE);
}
-- 2.34.1
dri-devel@lists.freedesktop.org