On Thu, Aug 26, 2021 at 12:45:14PM +0200, Thomas Hellström wrote:
Pinned contexts, like the migrate contexts need reset after resume since their context image may have been lost. Also the GuC needs to register pinned contexts.
Add a list to struct intel_engine_cs where we add all pinned contexts on creation, and traverse that list at resume time to reset the pinned contexts.
This fixes the kms_pipe_crc_basic@suspend-read-crc-pipe-a selftest for now, but proper LMEM backup / restore is needed for full suspend functionality. However, note that even with full LMEM backup / restore it may be desirable to keep the reset since backing up the migrate context images must happen using memcpy() after the migrate context has become inactive, and for performance- and other reasons we want to avoid memcpy() from LMEM.
Also traverse the list at guc_init_lrc_mapping() calling guc_kernel_context_pin() for the pinned contexts, like is already done for the kernel context.
v2:
- Don't reset the contexts on each __engine_unpark() but rather at resume time (Chris Wilson).
Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Brost Matthew matthew.brost@intel.com Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
I guess it got lost, but I few weeks ago I stumbled over this and wondered why we're even setting up a separate context or at least why a separate vm compared to the gt->vm we have already?
Even on chips with bazillions of copy engines the plan is that we only reserve a single one for kernel migrations, so there's not really a need for quite this much generality I think. Maybe check with Jon Bloomfield on this.
Iirc I had also a few other questions on simplifying this area. -Daniel
drivers/gpu/drm/i915/gt/intel_context_types.h | 8 +++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 23 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 3 +++ drivers/gpu/drm/i915/gt/mock_engine.c | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 +++++--- 8 files changed, 55 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..a63631ea0ec4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -152,6 +152,14 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu;
/**
* pinned_contexts_link: List link for the engine's pinned contexts.
* This is only used if this is a perma-pinned kernel context and
* the list is assumed to only be manipulated during driver load
* or unload time so no mutex protection currently.
*/
struct list_head pinned_contexts_link;
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 332efea696a5..c606a4714904 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
- INIT_LIST_HEAD(&engine->pinned_contexts_list); engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id);
@@ -875,6 +876,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, return ERR_PTR(err); }
- list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
- /*
- Give our perma-pinned kernel timelines a separate lockdep class,
- so that we can use them from within the normal user timelines
@@ -897,6 +900,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce) list_del(&ce->timeline->engine_link); mutex_unlock(&hwsp->vm->mutex);
- list_del(&ce->pinned_contexts_link); intel_context_unpin(ce); intel_context_put(ce);
} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 1f07ac4e0672..dacd62773735 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_engine_init_heartbeat(engine); }
+/**
- intel_engine_reset_pinned_contexts - Reset the pinned contexts of
- an engine.
- @engine: The engine whose pinned contexts we want to reset.
- Typically the pinned context LMEM images lose or get their content
- corrupted on suspend. This function resets their images.
- */
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine) +{
- struct intel_context *ce;
- list_for_each_entry(ce, &engine->pinned_contexts_list,
pinned_contexts_link) {
/* kernel context gets reset at __engine_unpark() */
if (ce == engine->kernel_context)
continue;
dbg_poison_ce(ce);
ce->ops->reset(ce);
- }
+}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_engine_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 70ea46d6cfb0..8520c595f5e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine)
void intel_engine_init__pm(struct intel_engine_cs *engine);
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index bfbfe53c23dd..5ae1207c363b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -307,6 +307,13 @@ struct intel_engine_cs {
struct intel_context *kernel_context; /* pinned */
/**
* pinned_contexts_list: List of pinned contexts. This list is only
* assumed to be manipulated during driver load- or unload time and
* does therefore not have any additional protection.
*/
struct list_head pinned_contexts_list;
intel_engine_mask_t saturated; /* submitting semaphores too late? */
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index dea8e2479897..c9bae2ef92df 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -192,6 +192,9 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
intel_rps_sanitize(>->rps);
- for_each_engine(engine, gt, id)
intel_engine_reset_pinned_contexts(engine);
- intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(gt->uncore->rpm, wakeref);
} diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2c1af030310c..8a14982a9691 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -376,6 +376,7 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce;
- INIT_LIST_HEAD(&engine->pinned_contexts_list); engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); if (!engine->sched_engine) return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 87d8dc8f51b9..55709206b95e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2385,9 +2385,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) * and even it did this code would be run again. */
- for_each_engine(engine, gt, id)
if (engine->kernel_context)
guc_kernel_context_pin(guc, engine->kernel_context);
- for_each_engine(engine, gt, id) {
struct intel_context *ce;
list_for_each_entry(ce, &engine->pinned_contexts_list,
pinned_contexts_link)
guc_kernel_context_pin(guc, ce);
- }
}
static void guc_release(struct intel_engine_cs *engine)
2.31.1