On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
Was that meant to be: return 0:
?
Also does that mean we are incorrectly falling back to memset, for non-userspace objects, instead of making it a noop?
No, we're deliberately falling back to memset for non-userspace objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if everything is implemented correctly.
- if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0;
intel_engine_pm_get(i915->gt.migrate.context-
engine);
ret = intel_context_migrate_clear(i915-
gt.migrate.context, NULL,
dst_st->sgl, dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; - struct sg_table *dst_st; int ret;
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource-
mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
+ ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) {
One future consideration is flat CCS where I don't think we can easily fall back to memcpy for userspace objects. Maybe we can make this fallback conditional on DG1 or !ALLOC_USER for now, or just add a TODO?
Ugh. Is that true for both clearing and copying, or is it only copying?
Problem is if we hit an engine reset and fence error during initial clearing / swapin, the plan moving forward is to intercept that and resort to cpu clearing / copying for security reasons. In the worst case we at least need to be able to clear.
/Thomas
+ dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
+ ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg-
region.start);
+ src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
+ ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj-
ttm.cached_io_st,
+ src_reg-
region.start);
+ ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +}
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct ttm_tt *ttm = bo->ttm; + struct sg_table *dst_st; + bool clear; + int ret;
/* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
- ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
- ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg-
region.start);
- src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
- ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj-
ttm.cached_io_st,
- src_reg-
region.start);
+ clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
+ __i915_ttm_move(bo, clear, dst_mem, dst_st);
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); -- 2.31.1