On 1/26/22 16:21, Matthew Auld wrote:
If the user doesn't require CPU access for the buffer, then ALLOC_TOPDOWN should be used, in order to prioritise allocating in the non-mappable portion of LMEM.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
I was wondering how this would work best with user-space not supplying any hints. Thinking that mappable LMEM buffers would be a minority, wouldn't it be better to have TOPDOWN behaviour set by default. It would then be migrated to mappable only if needed. And if the first usage is a cpu-map it would either be mapped in system or immediately migrated from pageless to mappable LMEM?
drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 15 +++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_region.c | 5 +++++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 ++++++++++--- drivers/gpu/drm/i915/gt/intel_gt.c | 4 +++- drivers/gpu/drm/i915/i915_vma.c | 3 +++ drivers/gpu/drm/i915/intel_region_ttm.c | 11 ++++++++--- drivers/gpu/drm/i915/selftests/mock_region.c | 7 +------ 8 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 71e778ecaeb8..29285aaf0477 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -319,15 +319,22 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_PM_VOLATILE BIT(4) /* Object needs to be restored early using memcpy during resume */ #define I915_BO_ALLOC_PM_EARLY BIT(5) +/*
- Object is likely never accessed by the CPU. This will prioritise the BO to be
- allocated in the non-mappable portion of lmem. This is merely a hint, and if
- dealing with userspace objects the CPU fault handler is free to ignore this.
- */
+#define I915_BO_ALLOC_TOPDOWN BIT(6) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \
I915_BO_ALLOC_PM_EARLY)
-#define I915_BO_READONLY BIT(6) -#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(8)
I915_BO_ALLOC_PM_EARLY | \
I915_BO_ALLOC_TOPDOWN)
+#define I915_BO_READONLY BIT(7) +#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(9) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 7d2211fbe548..a95b4d72619f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -346,6 +346,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO);
if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_TOPDOWN))
return ERR_PTR(-EINVAL);
assert_object_held(obj);
pinned = !(type & I915_MAP_OVERRIDE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index a4350227e9ae..f91e5a9c759d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -45,6 +45,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
- if (WARN_ON_ONCE(flags & I915_BO_ALLOC_TOPDOWN &&
(flags & I915_BO_ALLOC_CPU_CLEAR ||
flags & I915_BO_ALLOC_PM_EARLY)))
return ERR_PTR(-EINVAL);
- if (!mem) return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index d9a04c7d41b1..e60b677ecd54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -127,10 +127,14 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr, place->mem_type = intel_region_to_ttm_type(mr);
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place->flags = TTM_PL_FLAG_CONTIGUOUS;
if (mr->io_size && mr->io_size < mr->total) {place->flags |= TTM_PL_FLAG_CONTIGUOUS;
place->fpfn = 0;
place->lpfn = mr->io_size >> PAGE_SHIFT;
if (flags & I915_BO_ALLOC_TOPDOWN) {
place->flags |= TTM_PL_FLAG_TOPDOWN;
} else {
place->fpfn = 0;
place->lpfn = mr->io_size >> PAGE_SHIFT;
} }}
@@ -890,6 +894,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) if (!obj) return VM_FAULT_SIGBUS;
- if (obj->flags & I915_BO_ALLOC_TOPDOWN)
return -EINVAL;
- /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 622cdfed8a8b..8b83a771a2f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -454,7 +454,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) struct i915_vma *vma; int ret;
- obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
- obj = i915_gem_object_create_lmem(i915, size,
I915_BO_ALLOC_VOLATILE |
if (IS_ERR(obj)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj))I915_BO_ALLOC_TOPDOWN);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b1816a835abf..b2fdaa74e4b6 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -528,6 +528,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void __iomem *ptr; int err;
- if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_TOPDOWN))
return IO_ERR_PTR(-EINVAL);
- if (!i915_gem_object_is_lmem(vma->obj)) { if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV;
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 4689192d5e8d..282802aed174 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -199,13 +199,18 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret;
- if (flags & I915_BO_ALLOC_CONTIGUOUS)
if (mem->io_size && mem->io_size < mem->total) {place.flags |= TTM_PL_FLAG_CONTIGUOUS;
place.fpfn = 0;
place.lpfn = mem->io_size >> PAGE_SHIFT;
if (flags & I915_BO_ALLOC_TOPDOWN) {
place.flags |= TTM_PL_FLAG_TOPDOWN;
} else {
place.fpfn = 0;
place.lpfn = mem->io_size >> PAGE_SHIFT;
}
}
mock_bo.base.size = size;
place.flags = flags;
ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC)
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 467eeae6d5f0..f64325491f35 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -22,17 +22,12 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj,
static int mock_region_get_pages(struct drm_i915_gem_object *obj) {
unsigned int flags; struct sg_table *pages; int err;
flags = 0;
if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
flags |= TTM_PL_FLAG_CONTIGUOUS;
obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, obj->base.size,
flags);
if (IS_ERR(obj->mm.res)) return PTR_ERR(obj->mm.res);obj->flags);