Re: [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

30 Jun 2021

On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
...
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
thomas.hellstrom@linux.intel.com wrote:
...
In order to make the code a bit more readable and to facilitate
async memcpy moves, reorganize the move code a little. Determine
at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index c39d982c4fa6..4e529adcdfc7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
drm_i915_gem_object *obj,
 }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
+                              bool clear,
                               struct ttm_resource *dst_mem,
                               struct sg_table *dst_st)
 {
@@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
ttm_buffer_object *bo,
                return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
-       if (!ttm || !ttm_tt_is_populated(ttm)) {
+       if (clear) {
                if (bo->type == ttm_bo_type_kernel)
                        return -EINVAL;
Was that meant to be:
return 0:
?
Also does that mean we are incorrectly falling back to memset, for
non-userspace objects, instead of making it a noop?
No, we're deliberately falling back to memset for non-userspace
objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
everything is implemented correctly.
...
...
-               if (ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC))
-                       return 0;



intel_engine_pm_get(i915->gt.migrate.context-
...
engine);
ret = intel_context_migrate_clear(i915-
...
gt.migrate.context, NULL,
dst_st->sgl,
dst_level,
@@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
ttm_buffer_object *bo,
        return ret;
 }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
-                        struct ttm_operation_ctx *ctx,
-                        struct ttm_resource *dst_mem,
-                        struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
clear,
+                           struct ttm_resource *dst_mem,
+                           struct sg_table *dst_st)
 {
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       struct ttm_resource_manager *dst_man =
-               ttm_manager_type(bo->bdev, dst_mem->mem_type);
        struct intel_memory_region *dst_reg, *src_reg;
        union {
                struct ttm_kmap_iter_tt tt;
                struct ttm_kmap_iter_iomap io;
        } _dst_iter, _src_iter;
        struct ttm_kmap_iter *dst_iter, *src_iter;
-       struct sg_table *dst_st;
        int ret;
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
        src_reg = i915_ttm_region(bo->bdev, bo->resource-
...
mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
+       ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
+       if (ret) {
One future consideration is flat CCS where I don't think we can
easily
fall back to memcpy for userspace objects. Maybe we can make this
fallback conditional on DG1 or !ALLOC_USER for now, or just add a
TODO?
Ugh. Is that true for both clearing and copying, or is it only copying?
Problem is if we hit an engine reset and fence error during initial
clearing / swapin, the plan moving forward is to intercept that and
resort to cpu clearing / copying for security reasons. In the worst
case we at least need to be able to clear.
/Thomas
...
...
+               dst_iter = !cpu_maps_iomem(dst_mem) ?
+                       ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
...
ttm) :
+                       ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
+                                                dst_st, dst_reg-
...
region.start);



+               src_iter = !cpu_maps_iomem(bo->resource) ?
+                       ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
...
ttm) :
+                       ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
+                                                obj-
...
ttm.cached_io_st,
+                                                src_reg-
...
region.start);



+               ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
+       }
+}



+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+                        struct ttm_operation_ctx *ctx,
+                        struct ttm_resource *dst_mem,
+                        struct ttm_place *hop)
+{
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       struct ttm_resource_manager *dst_man =
+               ttm_manager_type(bo->bdev, dst_mem->mem_type);
+       struct ttm_tt *ttm = bo->ttm;
+       struct sg_table *dst_st;
+       bool clear;
+       int ret;



/* Sync for now. We could do the actual copy async. */
        ret = ttm_bo_wait_ctx(bo, ctx);
        if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct
ttm_buffer_object *bo, bool evict,
        }
/* Populate ttm with pages if needed. Typically system
memory. */
-       if (bo->ttm && (dst_man->use_tt ||
-                       (bo->ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
-               ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
+       if (ttm && (dst_man->use_tt || (ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
+               ret = ttm_tt_populate(bo->bdev, ttm, ctx);
                if (ret)
                        return ret;
        }
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct
ttm_buffer_object *bo, bool evict,
        if (IS_ERR(dst_st))
                return PTR_ERR(dst_st);
-       ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
-       if (ret) {
-               /* If we start mapping GGTT, we can no longer use
man::use_tt here. */
-               dst_iter = !cpu_maps_iomem(dst_mem) ?
-                       ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
...
ttm) :
-                       ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
-                                                dst_st, dst_reg-
...
region.start);



-               src_iter = !cpu_maps_iomem(bo->resource) ?
-                       ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
...
ttm) :
-                       ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
-                                                obj-
...
ttm.cached_io_st,
-                                                src_reg-
...
region.start);
+       clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
!ttm_tt_is_populated(ttm));
+       if (!(clear && ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
...
+               __i915_ttm_move(bo, clear, dst_mem, dst_st);
-               ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
-       }
-       /* Below dst_mem becomes bo->resource. */
        ttm_bo_move_sync_cleanup(bo, dst_mem);
        i915_ttm_adjust_domains_after_move(obj);
        i915_ttm_free_cached_io_st(obj);
--
2.31.1

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com

drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------