The ttm_move_memcpy() function was intended to be able to be used async under a fence. We are going to utilize that as a fallback if the gpu clearing blit fails before we set up CPU- or GPU ptes to the memory region. But to accomplish that the bo argument to ttm_move_memcpy() needs to be replaced.
Patch 1 reorganizes the i915 ttm move code a bit to make the change in patch 2 smaller. Patch 2 updates the ttm_move_memcpy() interface.
Thomas Hellström (2): drm/i915/ttm: Reorganize the ttm move code somewhat drm/ttm, drm/i915: Update ttm_move_memcpy for async use
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++----------- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++---- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 51 insertions(+), 41 deletions(-)
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++----------- 1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
- if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0; - intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; - struct sg_table *dst_st; int ret;
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); GEM_BUG_ON(!dst_reg || !src_reg);
+ ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) { + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +} + +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct ttm_tt *ttm = bo->ttm; + struct sg_table *dst_st; + bool clear; + int ret; + /* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
- ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); + clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + __i915_ttm_move(bo, clear, dst_mem, dst_st);
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj);
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++----------- 1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st)
{ @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
if (!ttm || !ttm_tt_is_populated(ttm)) {
if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
Was that meant to be: return 0:
?
Also does that mean we are incorrectly falling back to memset, for non-userspace objects, instead of making it a noop?
if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
return 0;
intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level,
@@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
struct ttm_resource *dst_mem,
struct sg_table *dst_st)
{ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter;
struct sg_table *dst_st; int ret; dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); GEM_BUG_ON(!dst_reg || !src_reg);
ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
if (ret) {
One future consideration is flat CCS where I don't think we can easily fall back to memcpy for userspace objects. Maybe we can make this fallback conditional on DG1 or !ALLOC_USER for now, or just add a TODO?
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
dst_st, dst_reg->region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
obj->ttm.cached_io_st,
src_reg->region.start);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
}
+}
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type);
struct ttm_tt *ttm = bo->ttm;
struct sg_table *dst_st;
bool clear;
int ret;
/* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system memory. */
if (bo->ttm && (dst_man->use_tt ||
(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; }
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
if (ret) {
/* If we start mapping GGTT, we can no longer use man::use_tt here. */
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
dst_st, dst_reg->region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
obj->ttm.cached_io_st,
src_reg->region.start);
clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
__i915_ttm_move(bo, clear, dst_mem, dst_st);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
}
/* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj);
-- 2.31.1
On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
Was that meant to be: return 0:
?
Also does that mean we are incorrectly falling back to memset, for non-userspace objects, instead of making it a noop?
No, we're deliberately falling back to memset for non-userspace objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if everything is implemented correctly.
- if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0;
intel_engine_pm_get(i915->gt.migrate.context-
engine);
ret = intel_context_migrate_clear(i915-
gt.migrate.context, NULL,
dst_st->sgl, dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; - struct sg_table *dst_st; int ret;
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource-
mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
+ ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) {
One future consideration is flat CCS where I don't think we can easily fall back to memcpy for userspace objects. Maybe we can make this fallback conditional on DG1 or !ALLOC_USER for now, or just add a TODO?
Ugh. Is that true for both clearing and copying, or is it only copying?
Problem is if we hit an engine reset and fence error during initial clearing / swapin, the plan moving forward is to intercept that and resort to cpu clearing / copying for security reasons. In the worst case we at least need to be able to clear.
/Thomas
+ dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
+ ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg-
region.start);
+ src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
+ ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj-
ttm.cached_io_st,
+ src_reg-
region.start);
+ ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +}
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct ttm_tt *ttm = bo->ttm; + struct sg_table *dst_st; + bool clear; + int ret;
/* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
- ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
- ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg-
region.start);
- src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
- ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj-
ttm.cached_io_st,
- src_reg-
region.start);
+ clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
+ __i915_ttm_move(bo, clear, dst_mem, dst_st);
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); -- 2.31.1
On Wed, 30 Jun 2021 at 16:27, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st)
{ @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
if (!ttm || !ttm_tt_is_populated(ttm)) {
if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
Was that meant to be: return 0:
?
Also does that mean we are incorrectly falling back to memset, for non-userspace objects, instead of making it a noop?
No, we're deliberately falling back to memset for non-userspace objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if everything is implemented correctly.
if (ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC))
return 0;
intel_engine_pm_get(i915->gt.migrate.context-
engine);
ret = intel_context_migrate_clear(i915-
gt.migrate.context, NULL,
dst_st->sgl,
dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
struct ttm_resource *dst_mem,
struct sg_table *dst_st)
{ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter;
struct sg_table *dst_st; int ret; dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource-
mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
if (ret) {
One future consideration is flat CCS where I don't think we can easily fall back to memcpy for userspace objects. Maybe we can make this fallback conditional on DG1 or !ALLOC_USER for now, or just add a TODO?
Ugh. Is that true for both clearing and copying, or is it only copying?
With clearing I think we are required to nuke the aux CCS state using some special blitter command.
For copying/moving I think it's a similar story, where special care might be needed for the aux state, which likely requires the blitter. Although tbh I don't really remember all the details.
Problem is if we hit an engine reset and fence error during initial clearing / swapin, the plan moving forward is to intercept that and resort to cpu clearing / copying for security reasons. In the worst case we at least need to be able to clear.
/Thomas
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
dst_st, dst_reg-
region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
obj-
ttm.cached_io_st,
src_reg-
region.start);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
}
+}
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type);
struct ttm_tt *ttm = bo->ttm;
struct sg_table *dst_st;
bool clear;
int ret;
/* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system
memory. */
if (bo->ttm && (dst_man->use_tt ||
(bo->ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
if (ttm && (dst_man->use_tt || (ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; }
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
if (ret) {
/* If we start mapping GGTT, we can no longer use
man::use_tt here. */
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
dst_st, dst_reg-
region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
obj-
ttm.cached_io_st,
src_reg-
region.start);
clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
!ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
__i915_ttm_move(bo, clear, dst_mem, dst_st);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
}
/* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj);
-- 2.31.1
On Wed, Jun 30, 2021 at 6:54 PM Matthew Auld matthew.william.auld@gmail.com wrote:
On Wed, 30 Jun 2021 at 16:27, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, }
static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st)
{ @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL;
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
if (!ttm || !ttm_tt_is_populated(ttm)) {
if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL;
Was that meant to be: return 0:
?
Also does that mean we are incorrectly falling back to memset, for non-userspace objects, instead of making it a noop?
No, we're deliberately falling back to memset for non-userspace objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if everything is implemented correctly.
if (ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC))
return 0;
intel_engine_pm_get(i915->gt.migrate.context-
engine);
ret = intel_context_migrate_clear(i915-
gt.migrate.context, NULL,
dst_st->sgl,
dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; }
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
struct ttm_resource *dst_mem,
struct sg_table *dst_st)
{ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter;
struct sg_table *dst_st; int ret; dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource-
mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
if (ret) {
One future consideration is flat CCS where I don't think we can easily fall back to memcpy for userspace objects. Maybe we can make this fallback conditional on DG1 or !ALLOC_USER for now, or just add a TODO?
Ugh. Is that true for both clearing and copying, or is it only copying?
With clearing I think we are required to nuke the aux CCS state using some special blitter command.
For copying/moving I think it's a similar story, where special care might be needed for the aux state, which likely requires the blitter. Although tbh I don't really remember all the details.
There's more than just flat CCS, for dg2 we'll also support resizeable BAR with the goal to make the non-mappable lmem available too. Afaik there's no fallback way to access that memory without a copy engine.
I think on those platforms we simply have to go back to wedging the driver if reset of the copy engine fails and one our kernel context was impacted. Nothing really we can do much there. On the big server gpus we'll have a dedicated copyengine for the kernel reserved, so pretty unlikely that dies, but on DG2 there's only one. -Daniel
Problem is if we hit an engine reset and fence error during initial clearing / swapin, the plan moving forward is to intercept that and resort to cpu clearing / copying for security reasons. In the worst case we at least need to be able to clear.
/Thomas
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
dst_st, dst_reg-
region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
obj-
ttm.cached_io_st,
src_reg-
region.start);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
}
+}
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *dst_mem,
struct ttm_place *hop)
+{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct ttm_resource_manager *dst_man =
ttm_manager_type(bo->bdev, dst_mem->mem_type);
struct ttm_tt *ttm = bo->ttm;
struct sg_table *dst_st;
bool clear;
int ret;
/* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, }
/* Populate ttm with pages if needed. Typically system
memory. */
if (bo->ttm && (dst_man->use_tt ||
(bo->ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
if (ttm && (dst_man->use_tt || (ttm->page_flags &
TTM_PAGE_FLAG_SWAPPED))) {
ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; }
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st);
ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
if (ret) {
/* If we start mapping GGTT, we can no longer use
man::use_tt here. */
dst_iter = !cpu_maps_iomem(dst_mem) ?
ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
dst_st, dst_reg-
region.start);
src_iter = !cpu_maps_iomem(bo->resource) ?
ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
obj-
ttm.cached_io_st,
src_reg-
region.start);
clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
!ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags &
TTM_PAGE_FLAG_ZERO_ALLOC)))
Seems quite hard to read?
Reviewed-by: Matthew Auld matthew.auld@intel.com
__i915_ttm_move(bo, clear, dst_mem, dst_st);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
src_iter);
}
/* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj);
-- 2.31.1
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers.
The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time.
Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++++++++++---------- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4e529adcdfc7..f19847abe856 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -517,7 +517,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start);
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); } }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev,
/** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. - * @bo: The struct ttm_buffer_object. - * @new_mem: The struct ttm_resource we're moving to (copy destination). - * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i;
@@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return;
/* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return; - + if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0;
if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; }
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo);
-void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter);
On 6/24/21 9:30 PM, Thomas Hellström wrote:
The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers.
The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time.
Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version.
Hmm,
Not sure where the Cc: Christian König ended up, but in any case Christian if you find this patch ok, Ack to merge through drm_intel_gt_next?
/Thomas
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++++++++++---------- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4e529adcdfc7..f19847abe856 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -517,7 +517,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start);
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
} }ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev,
/**
- ttm_move_memcpy - Helper to perform a memcpy ttm move operation.
- @bo: The struct ttm_buffer_object.
- @new_mem: The struct ttm_resource we're moving to (copy destination).
- @new_iter: A struct ttm_kmap_iter representing the destination resource.
- @clear: Whether to clear rather than copy.
- @num_pages: Number of pages of the operation.
*/
- @dst_iter: A struct ttm_kmap_iter representing the destination resource.
- @src_iter: A struct ttm_kmap_iter representing the source resource.
- This function is intended to be able to move out async under a
- dma-fence if desired.
-void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
- struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i;
@@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return;
/* Don't move nonexistent data. Clear destination instead. */
- if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) {
if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
return;
- if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem)
@@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter;
bool clear; int ret = 0;
if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) ||
@@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; }
- ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
- clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm));
- if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)))
ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
- src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo);
-void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter);
Am 28.06.21 um 13:21 schrieb Thomas Hellström:
On 6/24/21 9:30 PM, Thomas Hellström wrote:
The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers.
The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time.
Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version.
Hmm,
Not sure where the Cc: Christian König ended up, but in any case Christian if you find this patch ok, Ack to merge through drm_intel_gt_next?
Please send out the patch once more.
Only nit picks, but I would re-order the parameters for example.
Regards, Christian.
/Thomas
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++++++++++---------- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4e529adcdfc7..f19847abe856 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -517,7 +517,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start); - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); } } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev, /** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation.
- @bo: The struct ttm_buffer_object.
- @new_mem: The struct ttm_resource we're moving to (copy
destination).
- @new_iter: A struct ttm_kmap_iter representing the destination
resource.
- @clear: Whether to clear rather than copy.
- @num_pages: Number of pages of the operation.
- @dst_iter: A struct ttm_kmap_iter representing the destination
resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i; @@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; /* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return;
+ if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0; if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; } - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter);
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström thomas.hellstrom@linux.intel.com wrote:
The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers.
The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time.
Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
Reviewed-by: Matthew Auld matthew.auld@intel.com
dri-devel@lists.freedesktop.org