Patches that fix and enhance the selftest_lrc
Akeem G Abodunrin (1): drm/i915/xehpsdv/dg1/tgl: Fix issue with LRI relative addressing
Chris Wilson (7): drm/i915/selftests: Exercise relative mmio paths to non-privileged registers drm/i915/selftests: Exercise cross-process context isolation drm/i915/selftests: Flush the submission for lrc_isolation drm/i915/gt: Explicitly clear BB_OFFSET for new contexts drm/i915/selftests: Check for incomplete LRI from the context image drm/i915/selftest: Clear the output buffers before GPU writes drm/i915/selftest: Always cancel semaphore on error
drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 17 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 499 +++++++++++++++++--- 3 files changed, 452 insertions(+), 65 deletions(-)
From: Chris Wilson chris@chris-wilson.co.uk
Verify that context isolation is also preserved when accessing context-local registers with relative-mmio commands.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 88 ++++++++++++++++++++------ 1 file changed, 67 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 21c29d315cc0..073c9795f42f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -912,7 +912,9 @@ create_user_vma(struct i915_address_space *vm, unsigned long size) }
static struct i915_vma * -store_context(struct intel_context *ce, struct i915_vma *scratch) +store_context(struct intel_context *ce, + struct i915_vma *scratch, + bool relative) { struct i915_vma *batch; u32 dw, x, *cs, *hw; @@ -941,6 +943,9 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; + u32 cmd = MI_STORE_REGISTER_MEM_GEN8; + u32 offset = 0; + u32 mask = ~0;
if (hw[dw] == 0) { dw++; @@ -952,11 +957,19 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) continue; }
+ if (hw[dw] & MI_LRI_LRM_CS_MMIO) { + mask = 0xfff; + if (relative) + cmd |= MI_LRI_LRM_CS_MMIO; + else + offset = ce->engine->mmio_base; + } + dw++; len = (len + 1) / 2; while (len--) { - *cs++ = MI_STORE_REGISTER_MEM_GEN8; - *cs++ = hw[dw]; + *cs++ = cmd; + *cs++ = (hw[dw] & mask) + offset; *cs++ = lower_32_bits(scratch->node.start + x); *cs++ = upper_32_bits(scratch->node.start + x);
@@ -995,6 +1008,7 @@ static struct i915_request * record_registers(struct intel_context *ce, struct i915_vma *before, struct i915_vma *after, + bool relative, u32 *sema) { struct i915_vma *b_before, *b_after; @@ -1002,11 +1016,11 @@ record_registers(struct intel_context *ce, u32 *cs; int err;
- b_before = store_context(ce, before); + b_before = store_context(ce, before, relative); if (IS_ERR(b_before)) return ERR_CAST(b_before);
- b_after = store_context(ce, after); + b_after = store_context(ce, after, relative); if (IS_ERR(b_after)) { rq = ERR_CAST(b_after); goto err_before; @@ -1076,7 +1090,8 @@ record_registers(struct intel_context *ce, goto err_after; }
-static struct i915_vma *load_context(struct intel_context *ce, u32 poison) +static struct i915_vma * +load_context(struct intel_context *ce, u32 poison, bool relative) { struct i915_vma *batch; u32 dw, *cs, *hw; @@ -1103,7 +1118,10 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { + u32 cmd = MI_INSTR(0x22, 0); u32 len = hw[dw] & 0x7f; + u32 offset = 0; + u32 mask = ~0;
if (hw[dw] == 0) { dw++; @@ -1115,11 +1133,19 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) continue; }
+ if (hw[dw] & MI_LRI_LRM_CS_MMIO) { + mask = 0xfff; + if (relative) + cmd |= MI_LRI_LRM_CS_MMIO; + else + offset = ce->engine->mmio_base; + } + dw++; + *cs++ = cmd | len; len = (len + 1) / 2; - *cs++ = MI_LOAD_REGISTER_IMM(len); while (len--) { - *cs++ = hw[dw]; + *cs++ = (hw[dw] & mask) + offset; *cs++ = poison; dw += 2; } @@ -1136,14 +1162,18 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) return batch; }
-static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) +static int +poison_registers(struct intel_context *ce, + u32 poison, + bool relative, + u32 *sema) { struct i915_request *rq; struct i915_vma *batch; u32 *cs; int err;
- batch = load_context(ce, poison); + batch = load_context(ce, poison, relative); if (IS_ERR(batch)) return PTR_ERR(batch);
@@ -1193,7 +1223,7 @@ static int compare_isolation(struct intel_engine_cs *engine, struct i915_vma *ref[2], struct i915_vma *result[2], struct intel_context *ce, - u32 poison) + u32 poison, bool relative) { u32 x, dw, *hw, *lrc; u32 *A[2], *B[2]; @@ -1244,6 +1274,7 @@ static int compare_isolation(struct intel_engine_cs *engine, hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; + bool is_relative = relative;
if (hw[dw] == 0) { dw++; @@ -1255,6 +1286,9 @@ static int compare_isolation(struct intel_engine_cs *engine, continue; }
+ if (!(hw[dw] & MI_LRI_LRM_CS_MMIO)) + is_relative = false; + dw++; len = (len + 1) / 2; while (len--) { @@ -1266,9 +1300,10 @@ static int compare_isolation(struct intel_engine_cs *engine, break;
default: - pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", - engine->name, dw, - hw[dw], hw[dw + 1], + pr_err("%s[%d]: Mismatch for register %4x [using relative? %s], default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", + engine->name, dw, hw[dw], + is_relative ? "Yes" : "No", + hw[dw + 1], A[0][x], B[0][x], B[1][x], poison, lrc[dw + 1]); err = -EINVAL; @@ -1294,7 +1329,8 @@ static int compare_isolation(struct intel_engine_cs *engine, return err; }
-static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) +static int +__lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) { u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); struct i915_vma *ref[2], *result[2]; @@ -1324,7 +1360,7 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) goto err_ref0; }
- rq = record_registers(A, ref[0], ref[1], sema); + rq = record_registers(A, ref[0], ref[1], relative, sema); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ref1; @@ -1352,13 +1388,13 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) goto err_result0; }
- rq = record_registers(A, result[0], result[1], sema); + rq = record_registers(A, result[0], result[1], relative, sema); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_result1; }
- err = poison_registers(B, poison, sema); + err = poison_registers(B, poison, relative, sema); if (err) { WRITE_ONCE(*sema, -1); i915_request_put(rq); @@ -1372,7 +1408,7 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) } i915_request_put(rq);
- err = compare_isolation(engine, ref, result, A, poison); + err = compare_isolation(engine, ref, result, A, poison, relative);
err_result1: i915_vma_put(result[1]); @@ -1434,13 +1470,23 @@ static int live_lrc_isolation(void *arg) for (i = 0; i < ARRAY_SIZE(poison); i++) { int result;
- result = __lrc_isolation(engine, poison[i]); + result = __lrc_isolation(engine, poison[i], false); if (result && !err) err = result;
- result = __lrc_isolation(engine, ~poison[i]); + result = __lrc_isolation(engine, ~poison[i], false); if (result && !err) err = result; + + if (intel_engine_has_relative_mmio(engine)) { + result = __lrc_isolation(engine, poison[i], true); + if (result && !err) + err = result; + + result = __lrc_isolation(engine, ~poison[i], true); + if (result && !err) + err = result; + } } intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) {
From: Chris Wilson chris@chris-wilson.co.uk
Verify that one context running on engine A cannot manipulate another client's context concurrently running on engine B using unprivileged access.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 275 +++++++++++++++++++++---- 1 file changed, 238 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 073c9795f42f..998e561694be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -913,6 +913,7 @@ create_user_vma(struct i915_address_space *vm, unsigned long size)
static struct i915_vma * store_context(struct intel_context *ce, + struct intel_engine_cs *engine, struct i915_vma *scratch, bool relative) { @@ -930,7 +931,7 @@ store_context(struct intel_context *ce, return ERR_CAST(cs); }
- defaults = shmem_pin_map(ce->engine->default_state); + defaults = shmem_pin_map(engine->default_state); if (!defaults) { i915_gem_object_unpin_map(batch->obj); i915_vma_put(batch); @@ -962,7 +963,7 @@ store_context(struct intel_context *ce, if (relative) cmd |= MI_LRI_LRM_CS_MMIO; else - offset = ce->engine->mmio_base; + offset = engine->mmio_base; }
dw++; @@ -981,7 +982,7 @@ store_context(struct intel_context *ce,
*cs++ = MI_BATCH_BUFFER_END;
- shmem_unpin_map(ce->engine->default_state, defaults); + shmem_unpin_map(engine->default_state, defaults);
i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -1004,23 +1005,48 @@ static int move_to_active(struct i915_request *rq, return err; }
+struct hwsp_semaphore { + u32 ggtt; + u32 *va; +}; + +static struct hwsp_semaphore hwsp_semaphore(struct intel_engine_cs *engine) +{ + struct hwsp_semaphore s; + + s.va = memset32(engine->status_page.addr + 1000, 0, 1); + s.ggtt = (i915_ggtt_offset(engine->status_page.vma) + + offset_in_page(s.va)); + + return s; +} + +static u32 *emit_noops(u32 *cs, int count) +{ + while (count--) + *cs++ = MI_NOOP; + + return cs; +} + static struct i915_request * record_registers(struct intel_context *ce, + struct intel_engine_cs *engine, struct i915_vma *before, struct i915_vma *after, bool relative, - u32 *sema) + const struct hwsp_semaphore *sema) { struct i915_vma *b_before, *b_after; struct i915_request *rq; u32 *cs; int err;
- b_before = store_context(ce, before, relative); + b_before = store_context(ce, engine, before, relative); if (IS_ERR(b_before)) return ERR_CAST(b_before);
- b_after = store_context(ce, after, relative); + b_after = store_context(ce, engine, after, relative); if (IS_ERR(b_after)) { rq = ERR_CAST(b_after); goto err_before; @@ -1046,7 +1072,7 @@ record_registers(struct intel_context *ce, if (err) goto err_rq;
- cs = intel_ring_begin(rq, 14); + cs = intel_ring_begin(rq, 18); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_rq; @@ -1057,16 +1083,28 @@ record_registers(struct intel_context *ce, *cs++ = lower_32_bits(b_before->node.start); *cs++ = upper_32_bits(b_before->node.start);
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; - *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(sema); - *cs++ = 0; - *cs++ = MI_NOOP; + if (sema) { + WRITE_ONCE(*sema->va, -1); + + /* Signal the poisoner */ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = sema->ggtt; + *cs++ = 0; + *cs++ = 0; + + /* Then wait for the poison to settle */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = sema->ggtt; + *cs++ = 0; + *cs++ = MI_NOOP; + } else { + cs = emit_noops(cs, 10); + }
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); @@ -1075,7 +1113,6 @@ record_registers(struct intel_context *ce,
intel_ring_advance(rq, cs);
- WRITE_ONCE(*sema, 0); i915_request_get(rq); i915_request_add(rq); err_after: @@ -1091,7 +1128,9 @@ record_registers(struct intel_context *ce, }
static struct i915_vma * -load_context(struct intel_context *ce, u32 poison, bool relative) +load_context(struct intel_context *ce, + struct intel_engine_cs *engine, + u32 poison, bool relative) { struct i915_vma *batch; u32 dw, *cs, *hw; @@ -1107,7 +1146,7 @@ load_context(struct intel_context *ce, u32 poison, bool relative) return ERR_CAST(cs); }
- defaults = shmem_pin_map(ce->engine->default_state); + defaults = shmem_pin_map(engine->default_state); if (!defaults) { i915_gem_object_unpin_map(batch->obj); i915_vma_put(batch); @@ -1138,7 +1177,7 @@ load_context(struct intel_context *ce, u32 poison, bool relative) if (relative) cmd |= MI_LRI_LRM_CS_MMIO; else - offset = ce->engine->mmio_base; + offset = engine->mmio_base; }
dw++; @@ -1154,7 +1193,7 @@ load_context(struct intel_context *ce, u32 poison, bool relative)
*cs++ = MI_BATCH_BUFFER_END;
- shmem_unpin_map(ce->engine->default_state, defaults); + shmem_unpin_map(engine->default_state, defaults);
i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -1164,16 +1203,17 @@ load_context(struct intel_context *ce, u32 poison, bool relative)
static int poison_registers(struct intel_context *ce, + struct intel_engine_cs *engine, u32 poison, bool relative, - u32 *sema) + const struct hwsp_semaphore *sema) { struct i915_request *rq; struct i915_vma *batch; u32 *cs; int err;
- batch = load_context(ce, poison, relative); + batch = load_context(ce, engine, poison, relative); if (IS_ERR(batch)) return PTR_ERR(batch);
@@ -1187,20 +1227,29 @@ poison_registers(struct intel_context *ce, if (err) goto err_rq;
- cs = intel_ring_begin(rq, 8); + cs = intel_ring_begin(rq, 14); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_rq; }
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = sema->ggtt; + *cs++ = 0; + *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); *cs++ = lower_32_bits(batch->node.start); *cs++ = upper_32_bits(batch->node.start);
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(sema); + *cs++ = sema->ggtt; *cs++ = 0; *cs++ = 1;
@@ -1262,7 +1311,7 @@ static int compare_isolation(struct intel_engine_cs *engine, } lrc += LRC_STATE_OFFSET / sizeof(*hw);
- defaults = shmem_pin_map(ce->engine->default_state); + defaults = shmem_pin_map(engine->default_state); if (!defaults) { err = -ENOMEM; goto err_lrc; @@ -1315,7 +1364,7 @@ static int compare_isolation(struct intel_engine_cs *engine, } while (dw < PAGE_SIZE / sizeof(u32) && (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
- shmem_unpin_map(ce->engine->default_state, defaults); + shmem_unpin_map(engine->default_state, defaults); err_lrc: i915_gem_object_unpin_map(ce->state->obj); err_B1: @@ -1332,7 +1381,7 @@ static int compare_isolation(struct intel_engine_cs *engine, static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) { - u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); + struct hwsp_semaphore sema = hwsp_semaphore(engine); struct i915_vma *ref[2], *result[2]; struct intel_context *A, *B; struct i915_request *rq; @@ -1360,15 +1409,12 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) goto err_ref0; }
- rq = record_registers(A, ref[0], ref[1], relative, sema); + rq = record_registers(A, engine, ref[0], ref[1], relative, NULL); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ref1; }
- WRITE_ONCE(*sema, 1); - wmb(); - if (i915_request_wait(rq, 0, HZ / 2) < 0) { i915_request_put(rq); err = -ETIME; @@ -1388,15 +1434,15 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) goto err_result0; }
- rq = record_registers(A, result[0], result[1], relative, sema); + rq = record_registers(A, engine, result[0], result[1], relative, &sema); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_result1; }
- err = poison_registers(B, poison, relative, sema); + err = poison_registers(B, engine, poison, relative, &sema); if (err) { - WRITE_ONCE(*sema, -1); + WRITE_ONCE(*sema.va, -1); i915_request_put(rq); goto err_result1; } @@ -1498,6 +1544,160 @@ static int live_lrc_isolation(void *arg) return err; }
+static int __lrc_cross(struct intel_engine_cs *a, + struct intel_engine_cs *b, + u32 poison) +{ + struct hwsp_semaphore sema = hwsp_semaphore(a); + struct i915_vma *ref[2], *result[2]; + struct intel_context *A, *B; + struct i915_request *rq; + int err; + + GEM_BUG_ON(a->gt->ggtt != b->gt->ggtt); + + pr_debug("Context on %s, poisoning from %s with %08x\n", + a->name, b->name, poison); + + A = intel_context_create(a); + if (IS_ERR(A)) + return PTR_ERR(A); + + B = intel_context_create(b); + if (IS_ERR(B)) { + err = PTR_ERR(B); + goto err_A; + } + + ref[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[0])) { + err = PTR_ERR(ref[0]); + goto err_B; + } + + ref[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[1])) { + err = PTR_ERR(ref[1]); + goto err_ref0; + } + + rq = record_registers(A, a, ref[0], ref[1], false, NULL); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ref1; + } + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_ref1; + } + i915_request_put(rq); + + result[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[0])) { + err = PTR_ERR(result[0]); + goto err_ref1; + } + + result[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[1])) { + err = PTR_ERR(result[1]); + goto err_result0; + } + + rq = record_registers(A, a, result[0], result[1], false, &sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_result1; + } + + err = poison_registers(B, a, poison, false, &sema); + if (err) { + WRITE_ONCE(*sema.va, -1); + i915_request_put(rq); + goto err_result1; + } + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_result1; + } + i915_request_put(rq); + + err = compare_isolation(a, ref, result, A, poison, false); + +err_result1: + i915_vma_put(result[1]); +err_result0: + i915_vma_put(result[0]); +err_ref1: + i915_vma_put(ref[1]); +err_ref0: + i915_vma_put(ref[0]); +err_B: + intel_context_put(B); +err_A: + intel_context_put(A); + return err; +} + +static int live_lrc_cross(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *a, *b; + enum intel_engine_id a_id, b_id; + const u32 poison[] = { + STACK_MAGIC, + 0x3a3a3a3a, + 0x5c5c5c5c, + 0xffffffff, + 0xffff0000, + }; + int err = 0; + int i; + + /* + * Our goal is to try and tamper with another client's context + * running concurrently. The HW's goal is to stop us. + */ + + for_each_engine(a, gt, a_id) { + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && + skip_isolation(a)) + continue; + + intel_engine_pm_get(a); + for_each_engine(b, gt, b_id) { + if (a == b) + continue; + + intel_engine_pm_get(b); + for (i = 0; i < ARRAY_SIZE(poison); i++) { + int result; + + result = __lrc_cross(a, b, poison[i]); + if (result && !err) + err = result; + + result = __lrc_cross(a, b, ~poison[i]); + if (result && !err) + err = result; + } + intel_engine_pm_put(b); + } + intel_engine_pm_put(a); + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + return err; +} + static int indirect_ctx_submit_req(struct intel_context *ce) { struct i915_request *rq; @@ -1888,6 +2088,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_isolation), SUBTEST(live_lrc_timestamp), SUBTEST(live_lrc_garbage), + SUBTEST(live_lrc_cross), SUBTEST(live_pphwsp_runtime), SUBTEST(live_lrc_indirect_ctx_bb), };
From: Chris Wilson chris@chris-wilson.co.uk
The lrc_isolation test uses two contexts in order to read from one context while poisoning from a second. The test verifies that the writes of the second context do not leak into the first context. This is done by first recording the register state from context A, forcing a preemption to context B, and only then switching back to context A to re-read the register state to see if anything changed. The sequence is important (and internally controlled by semaphores), but it does require that context A is submitted *before* context B, as context B has higher priority to force the preemption.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 998e561694be..b064e824053f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1416,8 +1416,10 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) }
if (i915_request_wait(rq, 0, HZ / 2) < 0) { + pr_err("%s(%s): wait for reference results timed out\n", + __func__, engine->name); i915_request_put(rq); - err = -ETIME; + err = -EIO; goto err_ref1; } i915_request_put(rq); @@ -1440,6 +1442,17 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) goto err_result1; }
+ /* Wait until we record the register state before allowing preemption */ + if (wait_for_submit(engine, rq, HZ / 5)) { + pr_err("%s(%s): wait for submission timed out\n", + __func__, engine->name); + i915_request_put(rq); + err = -EIO; + goto err_result1; + } + while (READ_ONCE(*sema.va) && !signal_pending(current)) + usleep_range(100, 500); + err = poison_registers(B, engine, poison, relative, &sema); if (err) { WRITE_ONCE(*sema.va, -1); @@ -1448,6 +1461,8 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) }
if (i915_request_wait(rq, 0, HZ / 2) < 0) { + pr_err("%s(%s): wait for results timed out\n", + __func__, engine->name); i915_request_put(rq); err = -ETIME; goto err_result1;
From: Chris Wilson chris@chris-wilson.co.uk
Even though the initial protocontext we load onto HW has the register cleared, by the time we save it into the default image, BB_OFFSET has had the enable bit set. Reclear BB_OFFSET for each new context.
Testcase: igt/i915_selftests/gt_lrc
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 5 +++++ 3 files changed, 23 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 0bf8b45c9319..d6da3bbf66f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -109,6 +109,7 @@ #define RING_SBBSTATE(base) _MMIO((base) + 0x118) /* hsw+ */ #define RING_SBBADDR_UDW(base) _MMIO((base) + 0x11c) /* gen8+ */ #define RING_BBADDR(base) _MMIO((base) + 0x140) +#define RING_BB_OFFSET(base) _MMIO((base) + 0x158) #define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) /* gen8+ */ #define CCID(base) _MMIO((base) + 0x180) #define CCID_EN BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 07bef7128fdb..f673bae97a03 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -662,6 +662,18 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) return -1; }
+static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x80; + else if (GRAPHICS_VER(engine->i915) >= 12) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 9) + return 0x64; + else + return -1; +} + static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -768,6 +780,7 @@ static void init_common_regs(u32 * const regs, bool inhibit) { u32 ctl; + int loc;
ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -779,6 +792,10 @@ static void init_common_regs(u32 * const regs, regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->runtime.last; + + loc = lrc_ring_bb_offset(engine); + if (loc != -1) + regs[loc + 1] = 0; }
static void init_wa_bb_regs(u32 * const regs, diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b064e824053f..2149b2c92793 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -323,6 +323,11 @@ static int live_lrc_fixed(void *arg) lrc_ring_cmd_buf_cctl(engine), "RING_CMD_BUF_CCTL" }, + { + i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), + lrc_ring_bb_offset(engine), + "RING_BB_OFFSET" + }, { }, }, *t; u32 *hw;
From: Chris Wilson chris@chris-wilson.co.uk
In order to keep the context image parser simple, we assume that all commands follow a similar format. A few, especially not MI commands on the render engines, have fixed lengths not encoded in a length field. This caused us to incorrectly skip over 3D state commands, and start interpretting context data as instructions. Eventually, as Daniele discovered, this would lead us to find addition LRI as part of the data and mistakenly add invalid LRI commands to the context probes.
Stop parsing after we see the first !MI command, as we know we will have seen all the context registers by that point. (Mostly true for all gen so far, though the render context does have LRI after the first page that we have been ignoring so far. It would be useful to extract those as well so that we have the full list of user accesisble registers.)
Similarly, emit a warning if we do try to emit an invalid zero-length LRI.
Reported-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 63 ++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 2149b2c92793..6717ecaed178 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -27,6 +27,9 @@ #define NUM_GPR 16 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
+#define LRI_HEADER MI_INSTR(0x22, 0) +#define LRI_LENGTH_MASK GENMASK(7, 0) + static struct i915_vma *create_scratch(struct intel_gt *gt) { return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); @@ -180,7 +183,7 @@ static int live_lrc_layout(void *arg) continue; }
- if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((lri & GENMASK(31, 23)) != LRI_HEADER) { pr_err("%s: Expected LRI command at dword %d, found %08x\n", engine->name, dw, lri); err = -EINVAL; @@ -948,21 +951,43 @@ store_context(struct intel_context *ce, hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; u32 cmd = MI_STORE_REGISTER_MEM_GEN8; u32 offset = 0; u32 mask = ~0;
+ /* + * Keep it simple, skip parsing complex commands + * + * At present, there are no more MI_LOAD_REGISTER_IMM + * commands after the first 3D state command. Rather + * than include a table (see i915_cmd_parser.c) of all + * the possible commands and their instruction lengths + * (or mask for variable length instructions), assume + * we have gathered the complete list of registers and + * bail out. + */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; + if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { + /* Assume all other MI commands match LRI length mask */ dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + if (hw[dw] & MI_LRI_LRM_CS_MMIO) { mask = 0xfff; if (relative) @@ -1162,21 +1187,32 @@ load_context(struct intel_context *ce, hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 cmd = MI_INSTR(0x22, 0); - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + u32 cmd = LRI_HEADER; u32 offset = 0; u32 mask = ~0;
+ /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; + if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + if (hw[dw] & MI_LRI_LRM_CS_MMIO) { mask = 0xfff; if (relative) @@ -1327,19 +1363,30 @@ static int compare_isolation(struct intel_engine_cs *engine, hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; bool is_relative = relative;
+ /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; + if (hw[dw] == 0) { dw++; continue; }
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; }
+ if (!len) { + pr_err("%s: invalid LRI found in context image\n", + engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + if (!(hw[dw] & MI_LRI_LRM_CS_MMIO)) is_relative = false;
From: Chris Wilson chris@chris-wilson.co.uk
When testing whether we can get the GPU to leak information about non-privileged state, we first need to ensure that the output buffer is set to a known value as the HW may opt to skip the write into memory for a non-privileged read of a sensitive register. We chose POISON_INUSE (0x5a) so that is both non-zero and distinct from the poison values used during the test.
Reported-by: CQ Tang cq.tang@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 32 ++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6717ecaed178..d8face764ee4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1430,6 +1430,30 @@ static int compare_isolation(struct intel_engine_cs *engine, return err; }
+static struct i915_vma * +create_result_vma(struct i915_address_space *vm, unsigned long sz) +{ + struct i915_vma *vma; + void *ptr; + + vma = create_user_vma(vm, sz); + if (IS_ERR(vma)) + return vma; + + /* Set the results to a known value distinct from the poison */ + ptr = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(ptr)) { + i915_vma_put(vma); + return ERR_CAST(ptr); + } + + memset(ptr, POISON_INUSE, vma->size); + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return vma; +} + static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) { @@ -1449,13 +1473,13 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) goto err_A; }
- ref[0] = create_user_vma(A->vm, SZ_64K); + ref[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[0])) { err = PTR_ERR(ref[0]); goto err_B; }
- ref[1] = create_user_vma(A->vm, SZ_64K); + ref[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[1])) { err = PTR_ERR(ref[1]); goto err_ref0; @@ -1476,13 +1500,13 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) } i915_request_put(rq);
- result[0] = create_user_vma(A->vm, SZ_64K); + result[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[0])) { err = PTR_ERR(result[0]); goto err_ref1; }
- result[1] = create_user_vma(A->vm, SZ_64K); + result[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[1])) { err = PTR_ERR(result[1]); goto err_result0;
From: Chris Wilson chris@chris-wilson.co.uk
Ensure that we always signal the semaphore when timing out, so that if it happens to be stuck waiting for the semaphore we will quickly recover without having to wait for a reset.
Reported-by: CQ Tang cq.tang@intel.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d8face764ee4..63fd508fea49 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1530,20 +1530,17 @@ __lrc_isolation(struct intel_engine_cs *engine, u32 poison, bool relative) usleep_range(100, 500);
err = poison_registers(B, engine, poison, relative, &sema); - if (err) { - WRITE_ONCE(*sema.va, -1); - i915_request_put(rq); - goto err_result1; - } - - if (i915_request_wait(rq, 0, HZ / 2) < 0) { + if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { pr_err("%s(%s): wait for results timed out\n", __func__, engine->name); - i915_request_put(rq); err = -ETIME; - goto err_result1; } + + /* Always cancel the semaphore wait, just in case the GPU gets stuck */ + WRITE_ONCE(*sema.va, -1); i915_request_put(rq); + if (err) + goto err_result1;
err = compare_isolation(engine, ref, result, A, poison, relative);
From: Akeem G Abodunrin akeem.g.abodunrin@intel.com
When bit 19 of MI_LOAD_REGISTER_IMM instruction opcode is set on devices of tgl+, HW does not care about certain register address offsets, but instead check the following for valid address ranges on specific engines: RCS && CCS: BITS(0 - 10) BCS: BITS(0 - 11) VECS && VCS: BITS(0 - 13) Also, tgl+ now support relative addressing for BCS engine - So, this patch fixes issue with live_gt_lrc selftest that is failing where there is mismatch between LRC register layout generated during init and HW default register offsets.
Bspec: 45728
Cc: Kumar Valsan, Prathap prathap.kumar.valsan@intel.com Signed-off-by: Akeem G Abodunrin akeem.g.abodunrin@intel.com Signed-off-by: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 36 +++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 63fd508fea49..5b2a205ab372 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -131,6 +131,27 @@ static int context_flush(struct intel_context *ce, long timeout) return err; }
+static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) +{ + if ((lri & MI_LRI_LRM_CS_MMIO) == 0) + return ~0u; + + if (GRAPHICS_VER(engine->i915) < 12) + return 0xfff; + + switch (engine->class) { + default: + case RENDER_CLASS: + case COMPUTE_CLASS: + return 0x07ff; + case COPY_ENGINE_CLASS: + return 0x0fff; + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + return 0x3fff; + } +} + static int live_lrc_layout(void *arg) { struct intel_gt *gt = arg; @@ -170,6 +191,7 @@ static int live_lrc_layout(void *arg) dw = 0; do { u32 lri = READ_ONCE(hw[dw]); + u32 lri_mask;
if (lri == 0) { dw++; @@ -197,6 +219,18 @@ static int live_lrc_layout(void *arg) break; }
+ /* + * When bit 19 of MI_LOAD_REGISTER_IMM instruction + * opcode is set on Gen12+ devices, HW does not + * care about certain register address offsets, and + * instead check the following for valid address + * ranges on specific engines: + * RCS && CCS: BITS(0 - 10) + * BCS: BITS(0 - 11) + * VECS && VCS: BITS(0 - 13) + */ + lri_mask = get_lri_mask(engine, lri); + lri &= 0x7f; lri++; dw++; @@ -204,7 +238,7 @@ static int live_lrc_layout(void *arg) while (lri) { u32 offset = READ_ONCE(hw[dw]);
- if (offset != lrc[dw]) { + if ((offset ^ lrc[dw]) & lri_mask) { pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", engine->name, dw, offset, lrc[dw]); err = -EINVAL;
dri-devel@lists.freedesktop.org