Few bug fixes for lrc selftest.
v4: Gen8 don't have per engine recording of BB_OFFSET [Chris]
Chris Wilson (4): drm/i915/gt: Explicitly clear BB_OFFSET for new contexts drm/i915/selftests: Check for incomplete LRI from the context image drm/i915/selftest: Always cancel semaphore on error drm/i915/selftest: Clear the output buffers before GPU writes
drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 115 ++++++++++++++++---- 3 files changed, 116 insertions(+), 20 deletions(-)
From: Chris Wilson chris@chris-wilson.co.uk
Even though the initial protocontext we load onto HW has the register cleared, by the time we save it into the default image, BB_OFFSET has had the enable bit set. Reclear BB_OFFSET for each new context.
Testcase: igt/i915_selftests/gt_lrc
v2: Extend it for gen8. v3: BB_OFFSET is recorded per engine from Gen9 onwards
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com Reviewed-by: Thomas Hellstrom thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 5 +++++ 3 files changed, 26 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 75a0c55c5aa5..8c65f3a7acfb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -109,6 +109,7 @@ #define RING_SBBSTATE(base) _MMIO((base) + 0x118) /* hsw+ */ #define RING_SBBADDR_UDW(base) _MMIO((base) + 0x11c) /* gen8+ */ #define RING_BBADDR(base) _MMIO((base) + 0x140) +#define RING_BB_OFFSET(base) _MMIO((base) + 0x158) #define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) /* gen8+ */ #define CCID(base) _MMIO((base) + 0x180) #define CCID_EN BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eec73c66406c..ee8ab7470a62 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -662,6 +662,21 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) return -1; }
+static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x80; + else if (GRAPHICS_VER(engine->i915) >= 12) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 9) + return 0x64; + else if (GRAPHICS_VER(engine->i915) >= 8 && + engine->class == RENDER_CLASS) + return 0xc4; + else + return -1; +} + static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -768,6 +783,7 @@ static void init_common_regs(u32 * const regs, bool inhibit) { u32 ctl; + int loc;
ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -779,6 +795,10 @@ static void init_common_regs(u32 * const regs, regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->stats.runtime.last; + + loc = lrc_ring_bb_offset(engine); + if (loc != -1) + regs[loc + 1] = 0; }
static void init_wa_bb_regs(u32 * const regs, diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..c4bd4e1ac5ef 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -357,6 +357,11 @@ static int live_lrc_fixed(void *arg) lrc_ring_cmd_buf_cctl(engine), "RING_CMD_BUF_CCTL" }, + { + i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), + lrc_ring_bb_offset(engine), + "RING_BB_OFFSET" + }, { }, }, *t; u32 *hw;
On 2022-05-02 at 16:40:00 +0530, Ramalingam C wrote:
From: Chris Wilson chris@chris-wilson.co.uk
Even though the initial protocontext we load onto HW has the register cleared, by the time we save it into the default image, BB_OFFSET has had the enable bit set. Reclear BB_OFFSET for each new context.
Testcase: igt/i915_selftests/gt_lrc
v2: Extend it for gen8. v3: BB_OFFSET is recorded per engine from Gen9 onwards
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com Reviewed-by: Thomas Hellstrom thomas.hellstrom@linux.intel.com
Thomas,
Could you please reconfirm your R-b for v3? This R-b was given for v1.
Ram
drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 5 +++++ 3 files changed, 26 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 75a0c55c5aa5..8c65f3a7acfb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -109,6 +109,7 @@ #define RING_SBBSTATE(base) _MMIO((base) + 0x118) /* hsw+ */ #define RING_SBBADDR_UDW(base) _MMIO((base) + 0x11c) /* gen8+ */ #define RING_BBADDR(base) _MMIO((base) + 0x140) +#define RING_BB_OFFSET(base) _MMIO((base) + 0x158) #define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) /* gen8+ */ #define CCID(base) _MMIO((base) + 0x180) #define CCID_EN BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eec73c66406c..ee8ab7470a62 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -662,6 +662,21 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) return -1; }
+static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) +{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return 0x80;
- else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x70;
- else if (GRAPHICS_VER(engine->i915) >= 9)
return 0x64;
- else if (GRAPHICS_VER(engine->i915) >= 8 &&
engine->class == RENDER_CLASS)
return 0xc4;
- else
return -1;
+}
static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -768,6 +783,7 @@ static void init_common_regs(u32 * const regs, bool inhibit) { u32 ctl;
int loc;
ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
@@ -779,6 +795,10 @@ static void init_common_regs(u32 * const regs, regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
- loc = lrc_ring_bb_offset(engine);
- if (loc != -1)
regs[loc + 1] = 0;
}
static void init_wa_bb_regs(u32 * const regs, diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..c4bd4e1ac5ef 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -357,6 +357,11 @@ static int live_lrc_fixed(void *arg) lrc_ring_cmd_buf_cctl(engine), "RING_CMD_BUF_CCTL" },
{
i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
lrc_ring_bb_offset(engine),
"RING_BB_OFFSET"
}, *t; u32 *hw;}, { },
-- 2.20.1
From: Chris Wilson chris@chris-wilson.co.uk
In order to keep the context image parser simple, we assume that all commands follow a similar format. A few, especially not MI commands on the render engines, have fixed lengths not encoded in a length field. This caused us to incorrectly skip over 3D state commands, and start interpreting context data as instructions. Eventually, as Daniele discovered, this would lead us to find addition LRI as part of the data and mistakenly add invalid LRI commands to the context probes.
Stop parsing after we see the first !MI command, as we know we will have seen all the context registers by that point. (Mostly true for all gen so far, though the render context does have LRI after the first page that we have been ignoring so far. It would be useful to extract those as well so that we have the full list of user accessible registers.)
Similarly, emit a warning if we do try to emit an invalid zero-length LRI.
Reported-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com Acked-by: Thomas Hellstrom thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 61 +++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index c4bd4e1ac5ef..3271f01fe7db 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -27,6 +27,9 @@ #define NUM_GPR 16 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
+#define LRI_HEADER MI_INSTR(0x22, 0) +#define LRI_LENGTH_MASK GENMASK(7, 0) + static struct i915_vma *create_scratch(struct intel_gt *gt) { return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); @@ -202,7 +205,7 @@ static int live_lrc_layout(void *arg) continue; }
- if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((lri & GENMASK(31, 23)) != LRI_HEADER) { pr_err("%s: Expected LRI command at dword %d, found %08x\n", engine->name, dw, lri); err = -EINVAL; @@ -992,18 +995,40 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* + * Keep it simple, skip parsing complex commands + * + * At present, there are no more MI_LOAD_REGISTER_IMM + * commands after the first 3D state command. Rather + * than include a table (see i915_cmd_parser.c) of all + * the possible commands and their instruction lengths + * (or mask for variable length instructions), assume + * we have gathered the complete list of registers and + * bail out. + */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break;
if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { + /* Assume all other MI commands match LRI length mask */ dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + ce->engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; while (len--) { @@ -1155,18 +1180,29 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break;
if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + ce->engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; *cs++ = MI_LOAD_REGISTER_IMM(len); @@ -1297,18 +1333,29 @@ static int compare_isolation(struct intel_engine_cs *engine, hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break;
if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; while (len--) {
From: Chris Wilson chris@chris-wilson.co.uk
Ensure that we always signal the semaphore when timing out, so that if it happens to be stuck waiting for the semaphore we will quickly recover without having to wait for a reset.
Reported-by: CQ Tang cq.tang@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: CQ Tang cq.tang@intel.com cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com Reviewed-by: Thomas Hellstrom thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3271f01fe7db..e4d5d74489bf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1460,18 +1460,17 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) }
err = poison_registers(B, poison, sema); - if (err) { - WRITE_ONCE(*sema, -1); - i915_request_put(rq); - goto err_result1; - } - - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - i915_request_put(rq); + if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { + pr_err("%s(%s): wait for results timed out\n", + __func__, engine->name); err = -ETIME; - goto err_result1; } + + /* Always cancel the semaphore wait, just in case the GPU gets stuck */ + WRITE_ONCE(*sema, -1); i915_request_put(rq); + if (err) + goto err_result1;
err = compare_isolation(engine, ref, result, A, poison);
From: Chris Wilson chris@chris-wilson.co.uk
When testing whether we can get the GPU to leak information about non-privileged state, we first need to ensure that the output buffer is set to a known value as the HW may opt to skip the write into memory for a non-privileged read of a sensitive register. We chose POISON_INUSE (0x5a) so that is both non-zero and distinct from the poison values used during the test.
v2: Use i915_gem_object_pin_map_unlocked
Reported-by: CQ Tang cq.tang@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: CQ Tang cq.tang@intel.com cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com Reviewed-by: Thomas Hellstrom thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 32 ++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e4d5d74489bf..d04d08d9d92e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1395,6 +1395,30 @@ static int compare_isolation(struct intel_engine_cs *engine, return err; }
+static struct i915_vma * +create_result_vma(struct i915_address_space *vm, unsigned long sz) +{ + struct i915_vma *vma; + void *ptr; + + vma = create_user_vma(vm, sz); + if (IS_ERR(vma)) + return vma; + + /* Set the results to a known value distinct from the poison */ + ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); + if (IS_ERR(ptr)) { + i915_vma_put(vma); + return ERR_CAST(ptr); + } + + memset(ptr, POISON_INUSE, vma->size); + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return vma; +} + static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) { u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); @@ -1413,13 +1437,13 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) goto err_A; }
- ref[0] = create_user_vma(A->vm, SZ_64K); + ref[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[0])) { err = PTR_ERR(ref[0]); goto err_B; }
- ref[1] = create_user_vma(A->vm, SZ_64K); + ref[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[1])) { err = PTR_ERR(ref[1]); goto err_ref0; @@ -1441,13 +1465,13 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) } i915_request_put(rq);
- result[0] = create_user_vma(A->vm, SZ_64K); + result[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[0])) { err = PTR_ERR(result[0]); goto err_ref1; }
- result[1] = create_user_vma(A->vm, SZ_64K); + result[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[1])) { err = PTR_ERR(result[1]); goto err_result0;
dri-devel@lists.freedesktop.org