On 14/09/2021 20:31, Thomas Hellström wrote:
Just evict unpinned objects to system. For pinned LMEM objects, make a backup system object and blit the contents to that.
Backup is performed in three steps, 1: Opportunistically evict evictable objects using the gpu blitter. 2: After gt idle, evict evictable objects using the gpu blitter. This will be modified in an upcoming patch to backup pinned objects that are not used by the blitter itself. 3: Backup remaining pinned objects using memcpy.
Also move uC suspend to after 2) to make sure we have a functional GuC during 2) if using GuC submission.
v2:
- Major refactor to make sure gem_exec_suspend@hang-SX subtests work, and suspend / resume works with a slightly modified GuC submission enabling patch series.
v3:
- Fix a potential use-after-free (Matthew Auld)
- Use i915_gem_object_create_shmem() instead of i915_gem_object_create_region (Matthew Auld)
- Minor simplifications (Matthew Auld)
- Fix up kerneldoc for i195_ttm_restore_region().
- Final lmem_suspend() call moved to i915_gem_backup_suspend from i915_gem_suspend_late, since the latter gets called at driver unload and we don't unnecessarily want to run it at that time.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 92 +++++++- drivers/gpu/drm/i915/gem/i915_gem_pm.h | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 ++- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 10 + drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 203 ++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h | 24 +++ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 10 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- 11 files changed, 362 insertions(+), 17 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9d371be7dc5c..f9b69492a56c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -154,6 +154,7 @@ gem-y += \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ gem/i915_gem_ttm.o \
- gem/i915_gem_ttm_pm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ gem/i915_gemfs.o
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2471f36aaff3..734cc8e16481 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -534,6 +534,7 @@ struct drm_i915_gem_object { struct { struct sg_table *cached_io_st; struct i915_gem_object_page_iter get_io_page;
bool created:1; } ttm;struct drm_i915_gem_object *backup;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 8b9d7d14c4bd..8736ae1dfbb2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,6 +5,7 @@ */
#include "gem/i915_gem_pm.h" +#include "gem/i915_gem_ttm_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -39,7 +40,86 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); }
-void i915_gem_suspend_late(struct drm_i915_private *i915) +static int lmem_restore(struct drm_i915_private *i915, bool allow_gpu) +{
- struct intel_memory_region *mr;
- int ret = 0, id;
- for_each_memory_region(mr, i915, id) {
if (mr->type == INTEL_MEMORY_LOCAL) {
ret = i915_ttm_restore_region(mr, allow_gpu);
if (ret)
break;
}
- }
- return ret;
+}
+static int lmem_suspend(struct drm_i915_private *i915, bool allow_gpu,
bool backup_pinned)
+{
- struct intel_memory_region *mr;
- int ret = 0, id;
- for_each_memory_region(mr, i915, id) {
if (mr->type == INTEL_MEMORY_LOCAL) {
ret = i915_ttm_backup_region(mr, allow_gpu, backup_pinned);
if (ret)
break;
}
- }
- return ret;
+}
+static void lmem_recover(struct drm_i915_private *i915) +{
- struct intel_memory_region *mr;
- int id;
- for_each_memory_region(mr, i915, id)
if (mr->type == INTEL_MEMORY_LOCAL)
i915_ttm_recover_region(mr);
+}
+int i915_gem_backup_suspend(struct drm_i915_private *i915) +{
- int ret;
- /* Opportunistically try to evict unpinned objects */
- ret = lmem_suspend(i915, true, false);
- if (ret)
goto out_recover;
- i915_gem_suspend(i915);
- /*
* More objects may have become unpinned as requests were
* retired. Now try to evict again. The gt may be wedged here
* in which case we automatically fall back to memcpy.
*/
- ret = lmem_suspend(i915, true, false);
- if (ret)
goto out_recover;
- /*
* Remaining objects are backed up using memcpy once we've stopped
* using the migrate context.
*/
- ret = lmem_suspend(i915, false, true);
- if (ret)
goto out_recover;
- return 0;
+out_recover:
- lmem_recover(i915);
- return ret;
+}
+int i915_gem_suspend_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; struct list_head *phases[] = { @@ -83,6 +163,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); if (flush) wbinvd_on_all_cpus();
- return 0;
We can drop this change now?
I guess only slight concern is all the GEM_WARN_ON() instead of proper error handling in some places, but hopefully these should never be hit in practice, Reviewed-by: Matthew Auld matthew.auld@intel.com