The leftover bits around dealing with stolen-local memory + small BAR, plus some related fixes.
Just pass along the probed io_size. The backend should be able to utilize the entire range here, even if some of it is non-mappable.
It does leave open with what to do with stolen local-memory.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 6cecfdae07ad..783d81072c3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -93,6 +93,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) struct intel_memory_region *mem; resource_size_t min_page_size; resource_size_t io_start; + resource_size_t io_size; resource_size_t lmem_size; int err;
@@ -124,7 +125,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
io_start = pci_resource_start(pdev, 2); - if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) + io_size = min(pci_resource_len(pdev, 2), lmem_size); + if (!io_size) return ERR_PTR(-ENODEV);
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : @@ -134,7 +136,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size, min_page_size, io_start, - lmem_size, + io_size, INTEL_MEMORY_LOCAL, 0, &intel_region_lmem_ops);
From: Akeem G Abodunrin akeem.g.abodunrin@intel.com
On client platforms with reduced LMEM BAR, we should be able to continue with driver load with reduced io_size. Instead of using the BAR size to determine the how large stolen should be, we should instead use the ADDR_RANGE register to figure this out(at least on platforms like DG2). For simplicity we don't attempt to support partially mappable stolen.
Signed-off-by: Akeem G Abodunrin akeem.g.abodunrin@intel.com Co-developed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 49 ++++++++++++++++------ drivers/gpu/drm/i915/i915_reg.h | 3 ++ 2 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 0bf8f61134af..c9ad4f8c4eaf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -12,6 +12,8 @@
#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" +#include "gt/intel_gt.h" +#include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" #include "i915_reg.h" @@ -750,9 +752,9 @@ static int init_stolen_lmem(struct intel_memory_region *mem) if (GEM_WARN_ON(resource_size(&mem->region) == 0)) return -ENODEV;
- if (!io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) + if (mem->io_size && !io_mapping_init_wc(&mem->iomap, + mem->io_start, + mem->io_size)) return -EIO;
/* @@ -773,7 +775,8 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
static int release_stolen_lmem(struct intel_memory_region *mem) { - io_mapping_fini(&mem->iomap); + if (mem->io_size) + io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); return 0; } @@ -790,25 +793,44 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, { struct intel_uncore *uncore = &i915->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + resource_size_t dsm_size, dsm_base, lmem_size; struct intel_memory_region *mem; + resource_size_t io_start, io_size; resource_size_t min_page_size; - resource_size_t io_start; - resource_size_t lmem_size; - u64 lmem_base;
- lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE); - if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2))) + if (WARN_ON_ONCE(instance)) return ERR_PTR(-ENODEV);
- lmem_size = pci_resource_len(pdev, 2) - lmem_base; - io_start = pci_resource_start(pdev, 2) + lmem_base; + /* Use DSM base address instead for stolen memory */ + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); + if (IS_DG1(uncore->i915)) { + lmem_size = pci_resource_len(pdev, 2); + } else { + resource_size_t lmem_range; + + lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_size *= SZ_1G; + } + + dsm_size = lmem_size - dsm_base; + if (pci_resource_len(pdev, 2) < lmem_size) { + if (GEM_WARN_ON(IS_DG1(uncore->i915))) + return ERR_PTR(-ENODEV); + + io_start = 0; + io_size = 0; + } else { + io_start = pci_resource_start(pdev, 2) + dsm_base; + io_size = dsm_size; + }
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K;
- mem = intel_memory_region_create(i915, lmem_base, lmem_size, + mem = intel_memory_region_create(i915, dsm_base, dsm_size, min_page_size, - io_start, lmem_size, + io_start, io_size, type, instance, &i915_region_stolen_lmem_ops); if (IS_ERR(mem)) @@ -822,6 +844,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
intel_memory_region_set_name(mem, "stolen-local");
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 70484f6f2b8b..8ce2eaa002fa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8466,6 +8466,9 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13)
+#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 + #define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) #define XEHPSDV_CCS_BASE_SHIFT 8
Add a generic interface for allocating an object at some specific offset, and convert stolen over. Later we will want to hook this up to different backends.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- .../drm/i915/display/intel_plane_initial.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_region.c | 42 ++++++++-- drivers/gpu/drm/i915/gem/i915_gem_region.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 82 ++++++------------- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 4 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + drivers/gpu/drm/i915/gt/intel_rc6.c | 8 +- drivers/gpu/drm/i915/intel_memory_region.h | 1 + drivers/gpu/drm/i915/selftests/mock_region.c | 1 + 12 files changed, 80 insertions(+), 74 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index e207d12286b5..5227e5b35206 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */
+#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "intel_atomic_plane.h" #include "intel_display.h" @@ -69,7 +70,8 @@ initial_plane_vma(struct drm_i915_private *i915, size * 2 > i915->stolen_usable_size) return NULL;
- obj = i915_gem_object_create_stolen_for_preallocated(i915, base, size); + obj = i915_gem_object_create_region_at(i915->mm.stolen_region, + base, size, 0); if (IS_ERR(obj)) return NULL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index c6eb023d3d86..5802692ea604 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -123,7 +123,7 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size, */ flags = I915_BO_ALLOC_USER;
- ret = mr->ops->init_object(mr, obj, size, 0, flags); + ret = mr->ops->init_object(mr, obj, I915_BO_INVALID_OFFSET, size, 0, flags); if (ret) goto object_free;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 6cf94469d5a8..460a6924e611 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -27,11 +27,12 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) mutex_unlock(&mem->objects.lock); }
-struct drm_i915_gem_object * -i915_gem_object_create_region(struct intel_memory_region *mem, - resource_size_t size, - resource_size_t page_size, - unsigned int flags) +static struct drm_i915_gem_object * +__i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) { struct drm_i915_gem_object *obj; resource_size_t default_page_size; @@ -83,7 +84,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (default_page_size < mem->min_page_size) flags |= I915_BO_ALLOC_PM_EARLY;
- err = mem->ops->init_object(mem, obj, size, page_size, flags); + err = mem->ops->init_object(mem, obj, offset, size, page_size, flags); if (err) goto err_object_free;
@@ -95,6 +96,35 @@ i915_gem_object_create_region(struct intel_memory_region *mem, return ERR_PTR(err); }
+struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + return __i915_gem_object_create_region(mem, I915_BO_INVALID_OFFSET, + size, page_size, flags); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region_at(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + unsigned int flags) +{ + GEM_BUG_ON(offset == I915_BO_INVALID_OFFSET); + + if (GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) || + GEM_WARN_ON(!IS_ALIGNED(offset, mem->min_page_size))) + return ERR_PTR(-EINVAL); + + if (range_overflows(offset, size, resource_size(&mem->region))) + return ERR_PTR(-EINVAL); + + return __i915_gem_object_create_region(mem, offset, size, 0, + flags | I915_BO_ALLOC_CONTIGUOUS); +} + /** * i915_gem_process_region - Iterate over all objects of a region using ops * to process and optionally skip objects diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index fcaa12d657d4..2dfcc41c0170 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -14,6 +14,8 @@ struct sg_table;
struct i915_gem_apply_to_region;
+#define I915_BO_INVALID_OFFSET ((resource_size_t)-1) + /** * struct i915_gem_apply_to_region_ops - ops to use when iterating over all * region objects. @@ -56,6 +58,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, resource_size_t page_size, unsigned int flags); +struct drm_i915_gem_object * +i915_gem_object_create_region_at(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + unsigned int flags);
int i915_gem_process_region(struct intel_memory_region *mr, struct i915_gem_apply_to_region *apply); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 3a1c782ed791..9e5faf0bdd4e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -552,6 +552,7 @@ static int __create_shmem(struct drm_i915_private *i915,
static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index c9ad4f8c4eaf..b917ded21028 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -681,6 +681,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) @@ -695,12 +696,32 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, if (size == 0) return -EINVAL;
+ /* + * With discrete devices, where we lack a mappable aperture there is no + * possible way to ever access this memory on the CPU side. + */ + if (mem->type == INTEL_MEMORY_STOLEN_LOCAL && !mem->io_size && + !(flags & I915_BO_ALLOC_GPU_ONLY)) + return -ENOSPC; + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) return -ENOMEM;
- ret = i915_gem_stolen_insert_node(i915, stolen, size, - mem->min_page_size); + if (offset != I915_BO_INVALID_OFFSET) { + drm_dbg(&i915->drm, + "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n", + &offset, &size); + + stolen->start = offset; + stolen->size = size; + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); + mutex_unlock(&i915->mm.stolen_lock); + } else { + ret = i915_gem_stolen_insert_node(i915, stolen, size, + mem->min_page_size); + } if (ret) goto err_free;
@@ -873,63 +894,6 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, return mem; }
-struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, - resource_size_t stolen_offset, - resource_size_t size) -{ - struct intel_memory_region *mem = i915->mm.stolen_region; - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - int ret; - - if (!drm_mm_initialized(&i915->mm.stolen)) - return ERR_PTR(-ENODEV); - - drm_dbg(&i915->drm, - "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n", - &stolen_offset, &size); - - /* KISS and expect everything to be page-aligned */ - if (GEM_WARN_ON(size == 0) || - GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) || - GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size))) - return ERR_PTR(-EINVAL); - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return ERR_PTR(-ENOMEM); - - stolen->start = stolen_offset; - stolen->size = size; - mutex_lock(&i915->mm.stolen_lock); - ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); - mutex_unlock(&i915->mm.stolen_lock); - if (ret) - goto err_free; - - obj = i915_gem_object_alloc(); - if (!obj) { - ret = -ENOMEM; - goto err_stolen; - } - - ret = __i915_gem_object_create_stolen(mem, obj, stolen); - if (ret) - goto err_object_free; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - return obj; - -err_object_free: - i915_gem_object_free(obj); -err_stolen: - i915_gem_stolen_remove_node(i915, stolen); -err_free: - kfree(stolen); - return ERR_PTR(ret); -} - bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj) { return obj->ops == &i915_gem_object_stolen_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index ccdf7befc571..d5005a39d130 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -31,10 +31,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, resource_size_t size); -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - resource_size_t stolen_offset, - resource_size_t size);
bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 45cc5837ce00..5e543ed867a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1142,6 +1142,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) */ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 9d698ad00853..73e371aa3850 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -45,6 +45,7 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo)
int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 63db136cbc27..b4770690e794 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -6,6 +6,7 @@ #include <linux/pm_runtime.h> #include <linux/string_helpers.h>
+#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_reg.h" #include "i915_vgpu.h" @@ -325,9 +326,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) resource_size_t pcbr_offset;
pcbr_offset = (pcbr & ~4095) - i915->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(i915, - pcbr_offset, - pctx_size); + pctx = i915_gem_object_create_region_at(i915->mm.stolen_region, + pcbr_offset, + pctx_size, + 0); if (IS_ERR(pctx)) return PTR_ERR(pctx);
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 21dcbd620758..56f266020285 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -54,6 +54,7 @@ struct intel_memory_region_ops {
int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index f64325491f35..f16c0b7198c7 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -57,6 +57,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = {
static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags)
If this is an actual range allocation, and not some bias thing then the resultant allocation will already be naturally contiguous without needing any power-of-two rounding.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 129f668f21ff..8e4e3f72c1ef 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -71,7 +71,8 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
GEM_BUG_ON(min_page_size < mm->chunk_size);
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (place->fpfn + bman_res->base.num_pages != place->lpfn && + place->flags & TTM_PL_FLAG_CONTIGUOUS) { unsigned long pages;
size = roundup_pow_of_two(size);
For the ttm backend we can use existing placements fpfn and lpfn to force the allocator to place the object at the requested offset, potentially evicting stuff if the spot is currently occupied.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 18 ++++++++++++++---- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++++++- drivers/gpu/drm/i915/intel_region_ttm.h | 1 + drivers/gpu/drm/i915/selftests/mock_region.c | 3 +++ 5 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index fd54eb8f4826..2c88bdb8ff7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -631,6 +631,8 @@ struct drm_i915_gem_object {
struct drm_mm_node *stolen;
+ resource_size_t bo_offset; + unsigned long scratch; u64 encode;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5e543ed867a2..e4a06fcf741a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -126,6 +126,8 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) static void i915_ttm_place_from_region(const struct intel_memory_region *mr, struct ttm_place *place, + resource_size_t offset, + resource_size_t size, unsigned int flags) { memset(place, 0, sizeof(*place)); @@ -133,7 +135,10 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr,
if (flags & I915_BO_ALLOC_CONTIGUOUS) place->flags |= TTM_PL_FLAG_CONTIGUOUS; - if (mr->io_size && mr->io_size < mr->total) { + if (offset != I915_BO_INVALID_OFFSET) { + place->fpfn = offset >> PAGE_SHIFT; + place->lpfn = place->fpfn + (size >> PAGE_SHIFT); + } else if (mr->io_size && mr->io_size < mr->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place->flags |= TTM_PL_FLAG_TOPDOWN; } else { @@ -155,12 +160,14 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
placement->num_placement = 1; i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : - obj->mm.region, requested, flags); + obj->mm.region, requested, obj->bo_offset, + obj->base.size, flags);
/* Cache this on object? */ placement->num_busy_placement = num_allowed; for (i = 0; i < placement->num_busy_placement; ++i) - i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, + obj->bo_offset, obj->base.size, flags);
if (num_allowed == 0) { *busy = *requested; @@ -802,7 +809,8 @@ static int __i915_ttm_migrate(struct drm_i915_gem_object *obj, struct ttm_placement placement; int ret;
- i915_ttm_place_from_region(mr, &requested, flags); + i915_ttm_place_from_region(mr, &requested, obj->bo_offset, + obj->base.size, flags); placement.num_placement = 1; placement.num_busy_placement = 1; placement.placement = &requested; @@ -1159,6 +1167,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
+ obj->bo_offset = offset; + /* Don't put on a region list until we're either locked or fully initialized. */ obj->mm.region = mem; INIT_LIST_HEAD(&obj->mm.region_link); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 737ef3f4ab54..62ff77445b01 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -12,6 +12,7 @@
#include "intel_region_ttm.h"
+#include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */ /** * DOC: TTM support structure @@ -191,6 +192,7 @@ intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, */ struct ttm_resource * intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t offset, resource_size_t size, unsigned int flags) { @@ -202,7 +204,10 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
if (flags & I915_BO_ALLOC_CONTIGUOUS) place.flags |= TTM_PL_FLAG_CONTIGUOUS; - if (mem->io_size && mem->io_size < mem->total) { + if (offset != I915_BO_INVALID_OFFSET) { + place.fpfn = offset >> PAGE_SHIFT; + place.lpfn = place.fpfn + (size >> PAGE_SHIFT); + } else if (mem->io_size && mem->io_size < mem->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place.flags |= TTM_PL_FLAG_TOPDOWN; } else { diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index fdee5e7bd46c..cf9d86dcf409 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -36,6 +36,7 @@ struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST struct ttm_resource * intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t offset, resource_size_t size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index f16c0b7198c7..670557ce1024 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -26,6 +26,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) int err;
obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->bo_offset, obj->base.size, obj->flags); if (IS_ERR(obj->mm.res)) @@ -71,6 +72,8 @@ static int mock_object_init(struct intel_memory_region *mem, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class, flags);
+ obj->bo_offset = offset; + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
From: CQ Tang cq.tang@intel.com
When system does not have mappable aperture, ggtt->mappable_end=0. In this case if we pass PIN_MAPPABLE when pinning vma, the pinning code will return -ENOSPC. So conditionally set PIN_MAPPABLE if HAS_GMCH().
Suggested-by: Chris P Wilson chris.p.wilson@intel.com Signed-off-by: CQ Tang cq.tang@intel.com Cc: Radhakrishna Sripada radhakrishna.sripada@intel.com Cc: Ap Kamal kamal.ap@intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/display/intel_plane_initial.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 5227e5b35206..f797fcef18fc 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -51,6 +51,7 @@ initial_plane_vma(struct drm_i915_private *i915, struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 base, size; + u64 pinctl;
if (!mem || plane_config->size == 0) return NULL; @@ -101,7 +102,10 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(vma)) goto err_obj;
- if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) + pinctl = PIN_GLOBAL | PIN_OFFSET_FIXED | base; + if (HAS_GMCH(i915)) + pinctl |= PIN_MAPPABLE; + if (i915_vma_pin(vma, 0, 0, pinctl)) goto err_obj;
if (i915_gem_object_is_tiled(obj) &&
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- .../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base, - I915_GTT_MIN_ALIGNMENT); - size = round_up(plane_config->base + plane_config->size, - mem->min_page_size); + base = plane_config->base; + if (IS_DGFX(i915)) { + /* + * On discrete the base address should be somewhere in LMEM, but + * depending on the size of LMEM the base address might + * intersect with the start of DSM, like on DG1, in which case + * we need the relative address. In such cases we might also + * need to choose between inital fb vs fbc, if space is limited. + * + * On future discrete HW, like DG2, we should be able to just + * allocate directly from LMEM, due to larger LMEM size. + */ + if (base >= i915->dsm.start) + base -= i915->dsm.start; + } + + size = roundup(base + plane_config->size, mem->min_page_size); + base = round_down(base, I915_GTT_MIN_ALIGNMENT); size -= base;
/*
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
- size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
- base = plane_config->base;
- if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
}
size = roundup(base + plane_config->size, mem->min_page_size);
base = round_down(base, I915_GTT_MIN_ALIGNMENT); size -= base;
/*
-- 2.34.1
On 04/03/2022 19:33, Ville Syrjälä wrote:
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
- size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
- base = plane_config->base;
- if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
Thanks for taking a look at this. Is that subsequent code outside initial_plane_vma()? In the next patch this is now using LMEM directly for dg2. Would that blow up somewhere else?
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
Which PTEs? Is this for the below GGTT bind? I would have assumed that the create_at/for_preallocated would simply refuse to allocate the pages if the requested range is outside the regions usable range? Or maybe there is more going on behind the scenes here?
}
size = roundup(base + plane_config->size, mem->min_page_size);
base = round_down(base, I915_GTT_MIN_ALIGNMENT); size -= base;
/*
-- 2.34.1
On Mon, Mar 07, 2022 at 10:32:36AM +0000, Matthew Auld wrote:
On 04/03/2022 19:33, Ville Syrjälä wrote:
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
- size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
- base = plane_config->base;
- if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
Thanks for taking a look at this. Is that subsequent code outside initial_plane_vma()? In the next patch this is now using LMEM directly for dg2. Would that blow up somewhere else?
It uses i915_gem_object_create_stolen_for_preallocated() which assumes the stuff is inside stolen.
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
Which PTEs?
The PTEs the plane is actually using. We have no idea where they actually point to and just assume they represent a 1:1 mapping of stolen.
I suppose with lmem we'll just start assuming a 1:1 mapping of the whole lmem rather than just stolen.
On 07/03/2022 17:06, Ville Syrjälä wrote:
On Mon, Mar 07, 2022 at 10:32:36AM +0000, Matthew Auld wrote:
On 04/03/2022 19:33, Ville Syrjälä wrote:
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
- size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
- base = plane_config->base;
- if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
Thanks for taking a look at this. Is that subsequent code outside initial_plane_vma()? In the next patch this is now using LMEM directly for dg2. Would that blow up somewhere else?
It uses i915_gem_object_create_stolen_for_preallocated() which assumes the stuff is inside stolen.
At the start of the series that gets ripped out and replaced with i915_gem_object_create_region_at(), where we can now just pass in the intel_memory_region, and the backend hopefully takes care of the rest.
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
Which PTEs?
The PTEs the plane is actually using. We have no idea where they actually point to and just assume they represent a 1:1 mapping of stolen.
I suppose with lmem we'll just start assuming a 1:1 mapping of the whole lmem rather than just stolen.
So IIUC the base that we read is actually some GGTT address(I guess it comes pre-programmed or something?), and that hopefully 1:1 maps to stolen. Ok, so as you say, I guess we only want to subtract the dsm.start for the physical allocation, and not the GGTT address, when dealing with stolen lmem.
On Mon, Mar 07, 2022 at 06:26:32PM +0000, Matthew Auld wrote:
On 07/03/2022 17:06, Ville Syrjälä wrote:
On Mon, Mar 07, 2022 at 10:32:36AM +0000, Matthew Auld wrote:
On 04/03/2022 19:33, Ville Syrjälä wrote:
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
- base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
- size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
- base = plane_config->base;
- if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
Thanks for taking a look at this. Is that subsequent code outside initial_plane_vma()? In the next patch this is now using LMEM directly for dg2. Would that blow up somewhere else?
It uses i915_gem_object_create_stolen_for_preallocated() which assumes the stuff is inside stolen.
At the start of the series that gets ripped out and replaced with i915_gem_object_create_region_at(), where we can now just pass in the intel_memory_region, and the backend hopefully takes care of the rest.
Why? Is the BIOS no longer allocating its fbs from stolen?
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
Which PTEs?
The PTEs the plane is actually using. We have no idea where they actually point to and just assume they represent a 1:1 mapping of stolen.
I suppose with lmem we'll just start assuming a 1:1 mapping of the whole lmem rather than just stolen.
So IIUC the base that we read is actually some GGTT address(I guess it comes pre-programmed or something?), and that hopefully 1:1 maps to stolen. Ok, so as you say, I guess we only want to subtract the dsm.start for the physical allocation, and not the GGTT address, when dealing with stolen lmem.
On Mon, 7 Mar 2022 at 18:41, Ville Syrjälä ville.syrjala@linux.intel.com wrote:
On Mon, Mar 07, 2022 at 06:26:32PM +0000, Matthew Auld wrote:
On 07/03/2022 17:06, Ville Syrjälä wrote:
On Mon, Mar 07, 2022 at 10:32:36AM +0000, Matthew Auld wrote:
On 04/03/2022 19:33, Ville Syrjälä wrote:
On Fri, Mar 04, 2022 at 05:23:32PM +0000, Matthew Auld wrote:
The offset we get looks to be the exact start of DSM, but the inital_plane_vma expects the address to be relative.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com
.../drm/i915/display/intel_plane_initial.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index f797fcef18fc..b39d3a8dfe45 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915, if (!mem || plane_config->size == 0) return NULL;
base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
base = plane_config->base;
if (IS_DGFX(i915)) {
/*
* On discrete the base address should be somewhere in LMEM, but
* depending on the size of LMEM the base address might
* intersect with the start of DSM, like on DG1, in which case
* we need the relative address. In such cases we might also
* need to choose between inital fb vs fbc, if space is limited.
*
* On future discrete HW, like DG2, we should be able to just
* allocate directly from LMEM, due to larger LMEM size.
*/
if (base >= i915->dsm.start)
base -= i915->dsm.start;
Subsequent code expects the object to actually be inside stolen. If that is not the case we should just give up.
Thanks for taking a look at this. Is that subsequent code outside initial_plane_vma()? In the next patch this is now using LMEM directly for dg2. Would that blow up somewhere else?
It uses i915_gem_object_create_stolen_for_preallocated() which assumes the stuff is inside stolen.
At the start of the series that gets ripped out and replaced with i915_gem_object_create_region_at(), where we can now just pass in the intel_memory_region, and the backend hopefully takes care of the rest.
Why? Is the BIOS no longer allocating its fbs from stolen?
On discrete, so far DSM is always just snipped off the end of lmem. On DG1, which only has 4G lmem, the base seems to always exactly match the DSM start(not sure if this is a fluke). However on DG2, which has much larger lmem size, the base is still the same IIRC, but it isn't even close to where DSM is located on such a device. Best guess is that we were meant to just treat the bios fb(or that part of stolen lmem) as a part of normal lmem, and might explain why the base is not relative to the dsm.start like on integrated?
The fact that we fail to confirm any of that on integrated parts has always bugged me, but not enough to actually do anything about it. Such a check would be somewhat more involved since we'd have to look at the PTEs. But on discrete sounds like we can get away with a trivial check.
Which PTEs?
The PTEs the plane is actually using. We have no idea where they actually point to and just assume they represent a 1:1 mapping of stolen.
I suppose with lmem we'll just start assuming a 1:1 mapping of the whole lmem rather than just stolen.
So IIUC the base that we read is actually some GGTT address(I guess it comes pre-programmed or something?), and that hopefully 1:1 maps to stolen. Ok, so as you say, I guess we only want to subtract the dsm.start for the physical allocation, and not the GGTT address, when dealing with stolen lmem.
-- Ville Syrjälä Intel
On DG2+ the initial fb shouldn't be placed anywhere close to DSM, and so should just be allocated directly from LMEM.
Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/display/intel_plane_initial.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index b39d3a8dfe45..5a3baeb620a6 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -68,8 +68,12 @@ initial_plane_vma(struct drm_i915_private *i915, * On future discrete HW, like DG2, we should be able to just * allocate directly from LMEM, due to larger LMEM size. */ - if (base >= i915->dsm.start) + if (base >= i915->dsm.start) { base -= i915->dsm.start; + } else { + WARN_ON_ONCE(IS_DG1(i915)); + mem = i915->mm.regions[INTEL_REGION_LMEM]; + } }
size = roundup(base + plane_config->size, mem->min_page_size); @@ -82,11 +86,11 @@ initial_plane_vma(struct drm_i915_private *i915, * features. */ if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) && + mem == i915->mm.stolen_region && size * 2 > i915->stolen_usable_size) return NULL;
- obj = i915_gem_object_create_region_at(i915->mm.stolen_region, - base, size, 0); + obj = i915_gem_object_create_region_at(mem, base, size, 0); if (IS_ERR(obj)) return NULL;
dri-devel@lists.freedesktop.org