This series refactors i915's internal buffer backend to use ttm. It uses ttm's pool allocator to allocate volatile pages in place of the old code which rolled its own via alloc_pages. This is continuing progress to align all backends on using ttm.
Robert Beckett (4): drm/i915: add gen6 ppgtt dummy creation function drm/i915: setup ggtt scratch page after memory regions drm/i915: allow volatile buffers to use ttm pool allocator drm/i915: internal buffers use ttm backend
drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++----------- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 15 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 +- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 + drivers/gpu/drm/i915/i915_driver.c | 16 +- 8 files changed, 201 insertions(+), 180 deletions(-)
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt
Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, };
+static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} + static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM);
- pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, - &pd_dummy_obj_ops, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, I915_PDES * SZ_4K); if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL;
Hi,
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt
It's not clear from the commit message why we need a special case for this. Could you describe more in detail?
Thanks, Thomas
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, }; +static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level;
+ GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+ if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG);
+ obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM);
+ drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+ /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj);
+ obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level);
+ return obj; +}
static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->pt.base = __i915_gem_object_create_internal(ppgtt-
base.vm.gt->i915,
&pd_dummy_obj_o ps, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt-
i915, I915_PDES * SZ_4K);
if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL;
On 11/05/2022 11:13, Thomas Hellström wrote:
Hi,
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt
It's not clear from the commit message why we need a special case for this. Could you describe more in detail?
it always was a special case, that used the internal backend but provided it's own ops, so actually had no benefit from using the internal backend. See b0b0f2d225da6fe58417fae37e3f797e2db27b62
I'll add some further explanation in the commit message for v2.
Thanks, Thomas
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, };
+static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level;
+ GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+ if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG);
+ obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM);
+ drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+ /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj);
+ obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level);
+ return obj; +}
static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM);
- pd->pt.base = __i915_gem_object_create_internal(ppgtt-
base.vm.gt->i915,
&pd_dummy_obj_o ps, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt-
i915, I915_PDES * SZ_4K);
if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL;
reorder scratch page allocation so that memory regions are available to allocate the buffers
Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 ++++++ drivers/gpu/drm/i915/i915_driver.c | 16 ++++++++++------ 3 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..5411df1734ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; - u32 pte_flags; - int ret;
GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) }
kref_init(&ggtt->vm.resv_ref); + + return 0; +} + +/** + * i915_ggtt_setup_scratch_page - setup ggtt scratch page + * @i915: i915 device + */ +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + u32 pte_flags; + int ret; + + /* gen5- scratch setup currently happens in @intel_gtt_init */ + if (GRAPHICS_VER(i915) <= 5) + return 0; + ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h index 75ed55c1f30a..c6b79cb78637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915);
/* Stubs for non-x86 platforms */ #else @@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) /* No HW should be enabled for this case yet, return fail */ return -ENODEV; } + +static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + return 0; +} #endif
#endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..f67476b2f349 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_gmch.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h"
@@ -589,12 +590,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
ret = intel_gt_tiles_init(dev_priv); if (ret) - goto err_mem_regions; + goto err_ggtt; + + ret = i915_ggtt_setup_scratch_page(dev_priv); + if (ret) + goto err_ggtt;
ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(&dev_priv->drm, "failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; }
pci_set_master(pdev); @@ -646,11 +651,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); @@ -896,9 +900,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); - intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); i915_gem_drain_freed_objects(i915); + intel_memory_regions_driver_release(i915); i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); @@ -955,9 +959,9 @@ static void i915_driver_release(struct drm_device *dev)
i915_gem_driver_release(dev_priv);
- intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv);
i915_driver_mmio_release(dev_priv);
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
reorder scratch page allocation so that memory regions are available
Nit: s/reorder/Reorder/
Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com
to allocate the buffers
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 ++++++ drivers/gpu/drm/i915/i915_driver.c | 16 ++++++++++------ 3 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..5411df1734ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; - u32 pte_flags; - int ret; GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) } kref_init(&ggtt->vm.resv_ref);
+ return 0; +}
+/**
- i915_ggtt_setup_scratch_page - setup ggtt scratch page
- @i915: i915 device
- */
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + u32 pte_flags; + int ret;
+ /* gen5- scratch setup currently happens in @intel_gtt_init */ + if (GRAPHICS_VER(i915) <= 5) + return 0;
ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h index 75ed55c1f30a..c6b79cb78637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915); /* Stubs for non-x86 platforms */ #else @@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) /* No HW should be enabled for this case yet, return fail */ return -ENODEV; }
+static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + return 0; +} #endif #endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..f67476b2f349 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_gmch.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" @@ -589,12 +590,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) ret = intel_gt_tiles_init(dev_priv); if (ret) - goto err_mem_regions; + goto err_ggtt;
+ ret = i915_ggtt_setup_scratch_page(dev_priv); + if (ret) + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(&dev_priv->drm, "failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; } pci_set_master(pdev); @@ -646,11 +651,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); @@ -896,9 +900,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); - intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); i915_gem_drain_freed_objects(i915); + intel_memory_regions_driver_release(i915); i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); @@ -955,9 +959,9 @@ static void i915_driver_release(struct drm_device *dev) i915_gem_driver_release(dev_priv); - intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); i915_driver_mmio_release(dev_priv);
internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing
Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;
Hi, Bob,
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC; caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;
While this is ok, I think it also needs adjustment in the i915_ttm shrink callback. If someone creates a volatile smem object which then hits the shrinker, I think we might hit asserts that it's a is_shem ttm?
In this case, the shrink callback should just i915_ttm_purge().
/Thomas
On 11/05/2022 13:42, Thomas Hellström wrote:
Hi, Bob,
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;
While this is ok, I think it also needs adjustment in the i915_ttm shrink callback. If someone creates a volatile smem object which then hits the shrinker, I think we might hit asserts that it's a is_shem ttm?
In this case, the shrink callback should just i915_ttm_purge().
agreed. nice catch. I'll fix for v2
looks like we could maybe do with some extra shrinker testing too? looks like nothing caught this during CI testing
/Thomas
refactor internal buffer backend to allocate volatile pages via ttm pool allocator
Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++----------- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */
-#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h> - +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -}
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_place place = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, + }; + struct ttm_placement placement = { + .num_placement = 1, + .placement = &place, + .num_busy_placement = 0, + .busy_placement = NULL, + }; + int ret; + + ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + return ret; } -#endif
- gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; }
-create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; + if (!i915_gem_object_has_pages(obj)) { + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource);
- npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt);
- sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt->table.sgl)); }
- __i915_gem_object_set_pages(obj, st, sg_page_sizes); + GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages)); + i915_ttm_adjust_lru(obj);
return 0; +}
-err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
- return -ENOMEM; -} + .get_pages = i915_internal_get_pages, + .put_pages = i915_ttm_put_pages, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, +};
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- obj->mm.dirty = false; + mutex_destroy(&obj->ttm.get_io_page.lock);
- __start_cpu_write(obj); -} + if (obj->ttm.created) { + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj);
-static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } else { + __i915_gem_object_fini(obj); + } +}
+/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + int ret;
GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, ops, &lock_class, 0); - obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_VOLATILE); + + INIT_LIST_HEAD(&obj->mm.region_link); + + INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock);
- /* - * Mark the object as volatile, such that the pages are marked as - * dontneed whilst they are still pinned. As soon as they are unpinned - * they are allowed to be reaped by the shrinker, and the caller is - * expected to repopulate - the contents of this object are only valid - * whilst active and pinned. - */ - i915_gem_object_set_volatile(obj); + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);
+ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + ttm_bo_type_kernel, i915_ttm_sys_placement(), + 0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + i915_gem_object_free(obj); + return ERR_PTR(ret); + } + + obj->ttm.created = true; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - + obj->mem_flags &= ~I915_BO_FLAG_IOMEM; + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); + i915_gem_object_unlock(obj);
return obj; }
-/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ -struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size); - #endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) return &i915_sys_placement; }
-static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) { /* Fastpath */ if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; }
-static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, - struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj, return __i915_ttm_migrate(obj, mr, obj->flags); }
-static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) { /* * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo);
/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) + if (bo->destroy != i915_ttm_bo_destroy && + bo->destroy != i915_ttm_internal_bo_destroy) { return NULL; + }
return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res);
void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj);
int i915_ttm_purge(struct drm_i915_gem_object *obj);
@@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; } + +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err); + #endif
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
refactor internal buffer backend to allocate volatile pages via ttm pool allocator
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++---------
drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */ -#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h>
+#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h"
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg;
- for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg-
length));
- }
- sg_free_table(st); - kfree(st); -} -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp;
- max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment;
- max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_place place = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, + }; + struct ttm_placement placement = { + .num_placement = 1, + .placement = &place, + .num_busy_placement = 0, + .busy_placement = NULL, + }; + int ret;
+ ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + return ret; } -#endif - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32;
It looks like we're losing this restriction?
There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 allocations.
+ if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; } -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; + if (!i915_gem_object_has_pages(obj)) { + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource); - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt); - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0;
- do { - int order = min(fls(npages) - 1, max_order); - struct page *page;
- do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err;
- /* Limit subsequent allocations as well */ - max_order = order; - } while (1);
- sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++;
- npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - }
- sg = __sg_next(sg); - } while (1);
- if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt-
table.sgl));
} - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo-
ttm->num_pages));
+ i915_ttm_adjust_lru(obj); return 0; +} -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - return -ENOMEM; -} + .get_pages = i915_internal_get_pages, + .put_pages = i915_ttm_put_pages, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, +}; -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - obj->mm.dirty = false; + mutex_destroy(&obj->ttm.get_io_page.lock); - __start_cpu_write(obj); -} + if (obj->ttm.created) { + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj); -static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } else { + __i915_gem_object_fini(obj); + } +} +/**
- i915_gem_object_create_internal: create an object with volatile
pages
- @i915: the i915 device
- @size: the size in bytes of backing storage to allocate for the
object
- Creates a new object that wraps some internal memory for private
use.
- This object is not backed by swappable storage, and as such its
contents
- are volatile and only valid whilst pinned. If the object is
reaped by the
- shrinker, its pages and data will be discarded. Equally, it is
not a full
- GEM object and so not valid for access from userspace. This makes
it useful
- for hardware interfaces like ringbuffers (which are pinned from
the time
- the request is written to the time the hardware stops accessing
it), but
- not for contexts (which need to be preserved when not active for
later
- reuse). Note that it is not cleared upon allocation.
- */
struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + int ret; GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, ops, &lock_class, 0); - obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_VOLATILE);
+ INIT_LIST_HEAD(&obj->mm.region_link);
+ INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock); - /* - * Mark the object as volatile, such that the pages are marked as - * dontneed whilst they are still pinned. As soon as they are unpinned - * they are allowed to be reaped by the shrinker, and the caller is - * expected to repopulate - the contents of this object are only valid - * whilst active and pinned. - */ - i915_gem_object_set_volatile(obj); + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + ttm_bo_type_kernel, i915_ttm_sys_placement(), + 0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + i915_gem_object_free(obj); + return ERR_PTR(ret); + }
+ obj->ttm.created = true; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->mem_flags &= ~I915_BO_FLAG_IOMEM; + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); + i915_gem_object_unlock(obj); return obj; } -/**
- i915_gem_object_create_internal: create an object with volatile
pages
- @i915: the i915 device
- @size: the size in bytes of backing storage to allocate for the
object
- Creates a new object that wraps some internal memory for private
use.
- This object is not backed by swappable storage, and as such its
contents
- are volatile and only valid whilst pinned. If the object is
reaped by the
- shrinker, its pages and data will be discarded. Equally, it is
not a full
- GEM object and so not valid for access from userspace. This makes
it useful
- for hardware interfaces like ringbuffers (which are pinned from
the time
- the request is written to the time the hardware stops accessing
it), but
- not for contexts (which need to be preserved when not active for
later
- reuse). Note that it is not cleared upon allocation.
- */
-struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
While we don't have a TTM shmem backend ready yet for internal,
Did you consider setting up just yet another region, INTEL_REGION_INTERNAL, .class = INTEL_MEMORY_SYSTEM and .instance = 1,
And make it create a TTM system region on integrated, and use same region as INTEL_REGION_SMEM on dgfx.
I think ttm should automatically map that to I915_PL_SYSTEM and the backwards mapping in i915_ttm_region() should never get called since the object is never moved.
Then I figure it should suffice to just call __i915_gem_ttm_object_init() and we could drop a lot of code.
/Thomas
-} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size);
#endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) return &i915_sys_placement; } -static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) { /* Fastpath */ if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; } -static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, - struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj, return __i915_ttm_migrate(obj, mr, obj->flags); } -static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) { /* * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo); /** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) + if (bo->destroy != i915_ttm_bo_destroy && + bo->destroy != i915_ttm_internal_bo_destroy) { return NULL; + } return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res); void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj); int i915_ttm_purge(struct drm_i915_gem_object *obj); @@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; }
+int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err);
#endif
On 11/05/2022 15:14, Thomas Hellström wrote:
On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:
refactor internal buffer backend to allocate volatile pages via ttm pool allocator
Signed-off-by: Robert Beckett bob.beckett@collabora.com
drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++---------
drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */
-#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h>
+#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h"
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg;
- for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg-
length));
- }
- sg_free_table(st); - kfree(st); -}
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp;
- max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment;
- max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_place place = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, + }; + struct ttm_placement placement = { + .num_placement = 1, + .placement = &place, + .num_busy_placement = 0, + .busy_placement = NULL, + }; + int ret;
+ ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + return ret; } -#endif
- gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32;
It looks like we're losing this restriction?
There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 allocations.
agreed. will fix for v2
+ if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; }
-create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; + if (!i915_gem_object_has_pages(obj)) { + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource);
- npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt);
- sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0;
- do { - int order = min(fls(npages) - 1, max_order); - struct page *page;
- do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err;
- /* Limit subsequent allocations as well */ - max_order = order; - } while (1);
- sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++;
- npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - }
- sg = __sg_next(sg); - } while (1);
- if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt-
table.sgl));
}
- __i915_gem_object_set_pages(obj, st, sg_page_sizes); + GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo-
ttm->num_pages));
+ i915_ttm_adjust_lru(obj);
return 0; +}
-err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
- return -ENOMEM; -} + .get_pages = i915_internal_get_pages, + .put_pages = i915_ttm_put_pages, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, +};
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- obj->mm.dirty = false; + mutex_destroy(&obj->ttm.get_io_page.lock);
- __start_cpu_write(obj); -} + if (obj->ttm.created) { + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj);
-static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } else { + __i915_gem_object_fini(obj); + } +}
+/**
- i915_gem_object_create_internal: create an object with volatile
pages
- @i915: the i915 device
- @size: the size in bytes of backing storage to allocate for the
object
- Creates a new object that wraps some internal memory for private
use.
- This object is not backed by swappable storage, and as such its
contents
- are volatile and only valid whilst pinned. If the object is
reaped by the
- shrinker, its pages and data will be discarded. Equally, it is
not a full
- GEM object and so not valid for access from userspace. This makes
it useful
- for hardware interfaces like ringbuffers (which are pinned from
the time
- the request is written to the time the hardware stops accessing
it), but
- not for contexts (which need to be preserved when not active for
later
- reuse). Note that it is not cleared upon allocation.
- */
struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + int ret;
GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, ops, &lock_class, 0); - obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_VOLATILE);
+ INIT_LIST_HEAD(&obj->mm.region_link);
+ INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock);
- /* - * Mark the object as volatile, such that the pages are marked as - * dontneed whilst they are still pinned. As soon as they are unpinned - * they are allowed to be reaped by the shrinker, and the caller is - * expected to repopulate - the contents of this object are only valid - * whilst active and pinned. - */ - i915_gem_object_set_volatile(obj); + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);
+ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + ttm_bo_type_kernel, i915_ttm_sys_placement(), + 0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + i915_gem_object_free(obj); + return ERR_PTR(ret); + }
+ obj->ttm.created = true; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->mem_flags &= ~I915_BO_FLAG_IOMEM; + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); + i915_gem_object_unlock(obj);
return obj; }
-/**
- i915_gem_object_create_internal: create an object with volatile
pages
- @i915: the i915 device
- @size: the size in bytes of backing storage to allocate for the
object
- Creates a new object that wraps some internal memory for private
use.
- This object is not backed by swappable storage, and as such its
contents
- are volatile and only valid whilst pinned. If the object is
reaped by the
- shrinker, its pages and data will be discarded. Equally, it is
not a full
- GEM object and so not valid for access from userspace. This makes
it useful
- for hardware interfaces like ringbuffers (which are pinned from
the time
- the request is written to the time the hardware stops accessing
it), but
- not for contexts (which need to be preserved when not active for
later
- reuse). Note that it is not cleared upon allocation.
- */
-struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
While we don't have a TTM shmem backend ready yet for internal,
Did you consider setting up just yet another region, INTEL_REGION_INTERNAL, .class = INTEL_MEMORY_SYSTEM and .instance = 1,
And make it create a TTM system region on integrated, and use same region as INTEL_REGION_SMEM on dgfx.
I think ttm should automatically map that to I915_PL_SYSTEM and the backwards mapping in i915_ttm_region() should never get called since the object is never moved.
Then I figure it should suffice to just call __i915_gem_ttm_object_init() and we could drop a lot of code.
i briefly considered using a new fake region, but with current precedent mapping memory regions to real segemented memory areas I considered it an abuse of the semantics of memory regions.
If we are happy to have fake regions, I can revert it back to a previous design of using system region for discreet and add a fake region setup for integrated.
Would this be preferred over the current design?
/Thomas
-} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size);
#endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) return &i915_sys_placement; }
-static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) { /* Fastpath */ if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; }
-static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, - struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj, return __i915_ttm_migrate(obj, mr, obj->flags); }
-static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) { /* * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo);
/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) + if (bo->destroy != i915_ttm_bo_destroy && + bo->destroy != i915_ttm_internal_bo_destroy) { return NULL; + }
return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res);
void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj);
int i915_ttm_purge(struct drm_i915_gem_object *obj);
@@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; }
+int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err);
#endif
dri-devel@lists.freedesktop.org