Hi, Matthew,
On Mon, 2021-10-11 at 16:47 -0700, Matthew Brost wrote:
The hangcheck selftest blocks per engine resets by setting magic bits in the reset flags. This is incorrect for GuC submission because if the GuC fails to reset an engine we would like to do a full GT reset. Do no set these magic bits when using GuC submission.
Side note this lockless algorithm with magic bits to block resets really should be ripped out.
Lockless algorithm aside, from a quick look at the code in intel_reset.c it appears to me like the interface that falls back to a full GT reset is intel_gt_handle_error() whereas intel_engine_reset() is explicitly intended to not do that, so is there a discrepancy between GuC and non-GuC here?
/Thomas
Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7e2d99dd012d..90a03c60c80c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -734,7 +734,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_engine_count = i915_reset_engine_count(global, engine); st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + if (!using_guc) + set_bit(I915_RESET_ENGINE + id, >-
reset.flags);
count = 0; do { struct i915_request *rq = NULL; @@ -824,7 +825,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + if (!using_guc) + clear_bit(I915_RESET_ENGINE + id, >-
reset.flags);
st_engine_heartbeat_enable(engine); pr_info("%s: Completed %lu %s resets\n", engine->name, count, active ? "active" : "idle"); @@ -1042,7 +1044,8 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ st_engine_heartbeat_disable_no_pm(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + if (!using_guc) + set_bit(I915_RESET_ENGINE + id, >-
reset.flags);
do { struct i915_request *rq = NULL; struct intel_selftest_saved_policy saved; @@ -1165,7 +1168,8 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + if (!using_guc) + clear_bit(I915_RESET_ENGINE + id, >-
reset.flags);
st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n",