[PATCH 0/4] ttm for internal

List overview All Threads
Download

newer

older

[PATCH v1 1/2] Revert "drm/bridge:...

[PATCH 01/11] drm: of: Add...

Robert Beckett

3 May 2022 3 May '22

7:13 p.m.

This series refactors i915's internal buffer backend to use ttm. It uses ttm's pool allocator to allocate volatile pages in place of the old code which rolled its own via alloc_pages. This is continuing progress to align all backends on using ttm.

Robert Beckett (4): drm/i915: add gen6 ppgtt dummy creation function drm/i915: setup ggtt scratch page after memory regions drm/i915: allow volatile buffers to use ttm pool allocator drm/i915: internal buffers use ttm backend

drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++----------- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 15 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 +- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 + drivers/gpu/drm/i915/i915_driver.c | 16 +- 8 files changed, 201 insertions(+), 180 deletions(-)

-- 2.25.1

Show replies by date

Robert Beckett

3 May 3 May

7:13 p.m.

New subject: [PATCH 1/4] drm/i915: add gen6 ppgtt dummy creation function

Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt

Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, };

+static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} + static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM);

- pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, - &pd_dummy_obj_ops, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, I915_PDES * SZ_4K); if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL;

-- 2.25.1

Thomas Hellström

11 May 11 May

10:13 a.m.

New subject: [PATCH 1/4] drm/i915: add gen6 ppgtt dummy creation function

Hi,

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...

Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt

It's not clear from the commit message why we need a special case for this. Could you describe more in detail?

Thanks, Thomas

...

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {         .put_pages = pd_dummy_obj_put_pages, }; +static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ +       static struct lock_class_key lock_class; +       struct drm_i915_gem_object *obj; +       unsigned int cache_level;

+       GEM_BUG_ON(!size); +       GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));

+       if (overflows_type(size, obj->base.size)) +               return ERR_PTR(-E2BIG);

+       obj = i915_gem_object_alloc(); +       if (!obj) +               return ERR_PTR(-ENOMEM);

+       drm_gem_private_object_init(&i915->drm, &obj->base, size); +       i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;

+       /* +        * Mark the object as volatile, such that the pages are marked as +        * dontneed whilst they are still pinned. As soon as they are unpinned +        * they are allowed to be reaped by the shrinker, and the caller is +        * expected to repopulate - the contents of this object are only valid +        * whilst active and pinned. +        */ +       i915_gem_object_set_volatile(obj);

+       obj->read_domains = I915_GEM_DOMAIN_CPU; +       obj->write_domain = I915_GEM_DOMAIN_CPU;

+       cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; +       i915_gem_object_set_cache_coherency(obj, cache_level);

+       return obj; +}

static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)         if (unlikely(!pd))                 return ERR_PTR(-ENOMEM); -       pd->pt.base = __i915_gem_object_create_internal(ppgtt-

...
base.vm.gt->i915,

&pd_dummy_obj_o ps, -                                                       I915_PDES * SZ_4K); +       pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt-

...
i915, I915_PDES * SZ_4K);

if (IS_ERR(pd->pt.base)) {                 err = PTR_ERR(pd->pt.base);                 pd->pt.base = NULL;

Robert Beckett

23 May 23 May

3:52 p.m.

New subject: [PATCH 1/4] drm/i915: add gen6 ppgtt dummy creation function

On 11/05/2022 11:13, Thomas Hellström wrote:

...

Hi,

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt

It's not clear from the commit message why we need a special case for this. Could you describe more in detail?

it always was a special case, that used the internal backend but provided it's own ops, so actually had no benefit from using the internal backend. See b0b0f2d225da6fe58417fae37e3f797e2db27b62

I'll add some further explanation in the commit message for v2.

...

Thanks, Thomas

...
Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {         .put_pages = pd_dummy_obj_put_pages, };

+static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ +       static struct lock_class_key lock_class; +       struct drm_i915_gem_object *obj; +       unsigned int cache_level;

+       GEM_BUG_ON(!size); +       GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));

+       if (overflows_type(size, obj->base.size)) +               return ERR_PTR(-E2BIG);

+       obj = i915_gem_object_alloc(); +       if (!obj) +               return ERR_PTR(-ENOMEM);

+       drm_gem_private_object_init(&i915->drm, &obj->base, size); +       i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0); +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;

+       /* +        * Mark the object as volatile, such that the pages are marked as +        * dontneed whilst they are still pinned. As soon as they are unpinned +        * they are allowed to be reaped by the shrinker, and the caller is +        * expected to repopulate - the contents of this object are only valid +        * whilst active and pinned. +        */ +       i915_gem_object_set_volatile(obj);

+       obj->read_domains = I915_GEM_DOMAIN_CPU; +       obj->write_domain = I915_GEM_DOMAIN_CPU;

+       cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; +       i915_gem_object_set_cache_coherency(obj, cache_level);

+       return obj; +}

static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)         if (unlikely(!pd))                 return ERR_PTR(-ENOMEM);

-       pd->pt.base = __i915_gem_object_create_internal(ppgtt-

...
base.vm.gt->i915,

&pd_dummy_obj_o ps, -                                                       I915_PDES * SZ_4K); +       pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt-

...
i915, I915_PDES * SZ_4K);

if (IS_ERR(pd->pt.base)) {                 err = PTR_ERR(pd->pt.base);                 pd->pt.base = NULL;

Robert Beckett

3 May 3 May

7:13 p.m.

New subject: [PATCH 2/4] drm/i915: setup ggtt scratch page after memory regions

reorder scratch page allocation so that memory regions are available to allocate the buffers

Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 ++++++ drivers/gpu/drm/i915/i915_driver.c | 16 ++++++++++------ 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..5411df1734ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; - u32 pte_flags; - int ret;

GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) }

kref_init(&ggtt->vm.resv_ref); + + return 0; +} + +/** + * i915_ggtt_setup_scratch_page - setup ggtt scratch page + * @i915: i915 device + */ +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + u32 pte_flags; + int ret; + + /* gen5- scratch setup currently happens in @intel_gtt_init */ + if (GRAPHICS_VER(i915) <= 5) + return 0; + ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h index 75ed55c1f30a..c6b79cb78637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915);

/* Stubs for non-x86 platforms */ #else @@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) /* No HW should be enabled for this case yet, return fail */ return -ENODEV; } + +static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + return 0; +} #endif

#endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..f67476b2f349 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_gmch.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h"

@@ -589,12 +590,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)

ret = intel_gt_tiles_init(dev_priv); if (ret) - goto err_mem_regions; + goto err_ggtt; + + ret = i915_ggtt_setup_scratch_page(dev_priv); + if (ret) + goto err_ggtt;

ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(&dev_priv->drm, "failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; }

pci_set_master(pdev); @@ -646,11 +651,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); @@ -896,9 +900,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); - intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); i915_gem_drain_freed_objects(i915); + intel_memory_regions_driver_release(i915); i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); @@ -955,9 +959,9 @@ static void i915_driver_release(struct drm_device *dev)

i915_gem_driver_release(dev_priv);

- intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv);

i915_driver_mmio_release(dev_priv);

-- 2.25.1

Thomas Hellström

11 May 11 May

11:24 a.m.

New subject: [PATCH 2/4] drm/i915: setup ggtt scratch page after memory regions

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...

reorder scratch page allocation so that memory regions are available

Nit: s/reorder/Reorder/

Reviewed-by: Thomas Hellström thomas.hellstrom@linux.intel.com

...

to allocate the buffers

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 ++++++ drivers/gpu/drm/i915/i915_driver.c      | 16 ++++++++++------ 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..5411df1734ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)         struct drm_i915_private *i915 = ggtt->vm.i915;         struct pci_dev *pdev = to_pci_dev(i915->drm.dev);         phys_addr_t phys_addr; -       u32 pte_flags; -       int ret;         GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));         phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)         }         kref_init(&ggtt->vm.resv_ref);

+       return 0; +}

+/**

i915_ggtt_setup_scratch_page - setup ggtt scratch page

@i915: i915 device

*/

+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ +       struct i915_ggtt *ggtt = to_gt(i915)->ggtt; +       u32 pte_flags; +       int ret;

+       /* gen5- scratch setup currently happens in @intel_gtt_init */ +       if (GRAPHICS_VER(i915) <= 5) +               return 0;

ret = setup_scratch_page(&ggtt->vm);         if (ret) {                 drm_err(&i915->drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h index 75ed55c1f30a..c6b79cb78637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915); /* Stubs for non-x86 platforms */ #else @@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)         /* No HW should be enabled for this case yet, return fail */         return -ENODEV; }

+static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ +       return 0; +} #endif #endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..f67476b2f349 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_gmch.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" @@ -589,12 +590,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)         ret = intel_gt_tiles_init(dev_priv);         if (ret) -               goto err_mem_regions; +               goto err_ggtt;

+       ret = i915_ggtt_setup_scratch_page(dev_priv); +       if (ret) +               goto err_ggtt;         ret = i915_ggtt_enable_hw(dev_priv);         if (ret) {                 drm_err(&dev_priv->drm, "failed to enable GGTT\n"); -               goto err_mem_regions; +               goto err_ggtt;         }         pci_set_master(pdev); @@ -646,11 +651,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi:         if (pdev->msi_enabled)                 pci_disable_msi(pdev); -err_mem_regions: -       intel_memory_regions_driver_release(dev_priv); err_ggtt:         i915_ggtt_driver_release(dev_priv);         i915_gem_drain_freed_objects(dev_priv); +       intel_memory_regions_driver_release(dev_priv);         i915_ggtt_driver_late_release(dev_priv); err_perf:         i915_perf_fini(dev_priv); @@ -896,9 +900,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)         intel_modeset_driver_remove_nogem(i915); out_cleanup_hw:         i915_driver_hw_remove(i915); -       intel_memory_regions_driver_release(i915);         i915_ggtt_driver_release(i915);         i915_gem_drain_freed_objects(i915); +       intel_memory_regions_driver_release(i915);         i915_ggtt_driver_late_release(i915); out_cleanup_mmio:         i915_driver_mmio_release(i915); @@ -955,9 +959,9 @@ static void i915_driver_release(struct drm_device *dev)         i915_gem_driver_release(dev_priv); -       intel_memory_regions_driver_release(dev_priv);         i915_ggtt_driver_release(dev_priv);         i915_gem_drain_freed_objects(dev_priv); +       intel_memory_regions_driver_release(dev_priv);         i915_ggtt_driver_late_release(dev_priv);         i915_driver_mmio_release(dev_priv);

Robert Beckett

3 May 3 May

7:13 p.m.

New subject: [PATCH 3/4] drm/i915: allow volatile buffers to use ttm pool allocator

internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing

Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC;

caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;

-- 2.25.1

Thomas Hellström

11 May 11 May

12:42 p.m.

New subject: [PATCH 3/4] drm/i915: allow volatile buffers to use ttm pool allocator

Hi, Bob,

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...

internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC; caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;

While this is ok, I think it also needs adjustment in the i915_ttm shrink callback. If someone creates a volatile smem object which then hits the shrinker, I think we might hit asserts that it's a is_shem ttm?

In this case, the shrink callback should just i915_ttm_purge().

/Thomas

Robert Beckett

23 May 23 May

3:53 p.m.

New subject: [PATCH 3/4] drm/i915: allow volatile buffers to use ttm pool allocator

On 11/05/2022 13:42, Thomas Hellström wrote:

...

Hi, Bob,

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...
internal buffers should be shmem backed. if a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..fdb3a1c18cb6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC;

caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true;

While this is ok, I think it also needs adjustment in the i915_ttm shrink callback. If someone creates a volatile smem object which then hits the shrinker, I think we might hit asserts that it's a is_shem ttm?

In this case, the shrink callback should just i915_ttm_purge().

agreed. nice catch. I'll fix for v2

looks like we could maybe do with some extra shrinker testing too? looks like nothing caught this during CI testing

...

/Thomas

Robert Beckett

3 May 3 May

7:13 p.m.

New subject: [PATCH 4/4] drm/i915: internal buffers use ttm backend

refactor internal buffer backend to allocate volatile pages via ttm pool allocator

Signed-off-by: Robert Beckett bob.beckett@collabora.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++----------- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */

-#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h> - +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -}

-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_place place = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, + }; + struct ttm_placement placement = { + .num_placement = 1, + .placement = &place, + .num_busy_placement = 0, + .busy_placement = NULL, + }; + int ret; + + ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + return ret; } -#endif

- gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; }

-create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; + if (!i915_gem_object_has_pages(obj)) { + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource);

- npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt);

- sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt->table.sgl)); }

- __i915_gem_object_set_pages(obj, st, sg_page_sizes); + GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages)); + i915_ttm_adjust_lru(obj);

return 0; +}

-err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE,

- return -ENOMEM; -} + .get_pages = i915_internal_get_pages, + .put_pages = i915_ttm_put_pages, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, +};

-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);

- obj->mm.dirty = false; + mutex_destroy(&obj->ttm.get_io_page.lock);

- __start_cpu_write(obj); -} + if (obj->ttm.created) { + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj);

-static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } else { + __i915_gem_object_fini(obj); + } +}

+/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + int ret;

GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM);

drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, ops, &lock_class, 0); - obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_VOLATILE); + + INIT_LIST_HEAD(&obj->mm.region_link); + + INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock);

- /* - * Mark the object as volatile, such that the pages are marked as - * dontneed whilst they are still pinned. As soon as they are unpinned - * they are allowed to be reaped by the shrinker, and the caller is - * expected to repopulate - the contents of this object are only valid - * whilst active and pinned. - */ - i915_gem_object_set_volatile(obj); + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);

+ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + ttm_bo_type_kernel, i915_ttm_sys_placement(), + 0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + i915_gem_object_free(obj); + return ERR_PTR(ret); + } + + obj->ttm.created = true; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - + obj->mem_flags &= ~I915_BO_FLAG_IOMEM; + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); + i915_gem_object_unlock(obj);

return obj; }

-/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ -struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, - const struct drm_i915_gem_object_ops *ops, - phys_addr_t size); - #endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) return &i915_sys_placement; }

-static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) { /* Fastpath */ if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; }

-static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, - struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj, return __i915_ttm_migrate(obj, mr, obj->flags); }

-static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) { /* * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo);

/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) + if (bo->destroy != i915_ttm_bo_destroy && + bo->destroy != i915_ttm_internal_bo_destroy) { return NULL; + }

return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res);

void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj);

int i915_ttm_purge(struct drm_i915_gem_object *obj);

@@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; } + +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err); + #endif

-- 2.25.1

Thomas Hellström

11 May 11 May

2:14 p.m.

New subject: [PATCH 4/4] drm/i915: internal buffers use ttm backend

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...

refactor internal buffer backend to allocate volatile pages via ttm pool allocator

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++---------

drivers/gpu/drm/i915/gem/i915_gem_internal.h |   5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c      | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h      | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */ -#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h>

+#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h"

-#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)

-static void internal_free_pages(struct sg_table *st) -{ -       struct scatterlist *sg;

-       for (sg = st->sgl; sg; sg = __sg_next(sg)) { -               if (sg_page(sg)) -                       __free_pages(sg_page(sg), get_order(sg-

...
length));

-       }

-       sg_free_table(st); -       kfree(st); -} -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { -       struct drm_i915_private *i915 = to_i915(obj->base.dev); -       struct sg_table *st; -       struct scatterlist *sg; -       unsigned int sg_page_sizes; -       unsigned int npages; -       int max_order; -       gfp_t gfp;

-       max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB -       if (is_swiotlb_active(obj->base.dev->dev)) { -               unsigned int max_segment;

-               max_segment = swiotlb_max_segment(); -               if (max_segment) { -                       max_segment = max_t(unsigned int, max_segment, -                                           PAGE_SIZE) >> PAGE_SHIFT; -                       max_order = min(max_order, ilog2(max_segment)); -               } +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); +       struct ttm_operation_ctx ctx = { +               .interruptible = true, +               .no_wait_gpu = false, +       }; +       struct ttm_place place = { +               .fpfn = 0, +               .lpfn = 0, +               .mem_type = I915_PL_SYSTEM, +               .flags = 0, +       }; +       struct ttm_placement placement = { +               .num_placement = 1, +               .placement = &place, +               .num_busy_placement = 0, +               .busy_placement = NULL, +       }; +       int ret;

+       ret = ttm_bo_validate(bo, &placement, &ctx); +       if (ret) { +               ret = i915_ttm_err_to_gem(ret); +               return ret;         } -#endif -       gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; -       if (IS_I965GM(i915) || IS_I965G(i915)) { -               /* 965gm cannot relocate objects above 4GiB. */ -               gfp &= ~__GFP_HIGHMEM; -               gfp |= __GFP_DMA32;

It looks like we're losing this restriction?

There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 allocations.

...

+       if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { +               ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); +               if (ret) +                       return ret;         } -create_st: -       st = kmalloc(sizeof(*st), GFP_KERNEL); -       if (!st) -               return -ENOMEM; +       if (!i915_gem_object_has_pages(obj)) { +               struct i915_refct_sgt *rsgt = +                       i915_ttm_resource_get_st(obj, bo->resource); -       npages = obj->base.size / PAGE_SIZE; -       if (sg_alloc_table(st, npages, GFP_KERNEL)) { -               kfree(st); -               return -ENOMEM; -       } +               if (IS_ERR(rsgt)) +                       return PTR_ERR(rsgt); -       sg = st->sgl; -       st->nents = 0; -       sg_page_sizes = 0;

-       do { -               int order = min(fls(npages) - 1, max_order); -               struct page *page;

-               do { -                       page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), -                                          order); -                       if (page) -                               break; -                       if (!order--) -                               goto err;

-                       /* Limit subsequent allocations as well */ -                       max_order = order; -               } while (1);

-               sg_set_page(sg, page, PAGE_SIZE << order, 0); -               sg_page_sizes |= PAGE_SIZE << order; -               st->nents++;

-               npages -= 1 << order; -               if (!npages) { -                       sg_mark_end(sg); -                       break; -               }

-               sg = __sg_next(sg); -       } while (1);

-       if (i915_gem_gtt_prepare_pages(obj, st)) { -               /* Failed to dma-map try again with single page sg segments */ -               if (get_order(st->sgl->length)) { -                       internal_free_pages(st); -                       max_order = 0; -                       goto create_st; -               } -               goto err; +               GEM_BUG_ON(obj->mm.rsgt); +               obj->mm.rsgt = rsgt; +               __i915_gem_object_set_pages(obj, &rsgt->table, +                                           i915_sg_dma_sizes(rsgt-

...
table.sgl));

} -       __i915_gem_object_set_pages(obj, st, sg_page_sizes); +       GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo-

...
ttm->num_pages));

+       i915_ttm_adjust_lru(obj);         return 0; +} -err: -       sg_set_page(sg, NULL, 0, 0); -       sg_mark_end(sg); -       internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { +       .name = "i915_gem_object_ttm", +       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, -       return -ENOMEM; -} +       .get_pages = i915_internal_get_pages, +       .put_pages = i915_ttm_put_pages, +       .adjust_lru = i915_ttm_adjust_lru, +       .delayed_free = i915_ttm_delayed_free, +}; -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, -                                              struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { -       i915_gem_gtt_finish_pages(obj, pages); -       internal_free_pages(pages); +       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); -       obj->mm.dirty = false; +       mutex_destroy(&obj->ttm.get_io_page.lock); -       __start_cpu_write(obj); -} +       if (obj->ttm.created) { +               /* This releases all gem object bindings to the backend. */ +               __i915_gem_free_object(obj); -static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { -       .name = "i915_gem_object_internal", -       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, -       .get_pages = i915_gem_object_get_pages_internal, -       .put_pages = i915_gem_object_put_pages_internal, -}; +               call_rcu(&obj->rcu, __i915_gem_free_object_rcu); +       } else { +               __i915_gem_object_fini(obj); +       } +} +/**

i915_gem_object_create_internal: create an object with volatile

pages

@i915: the i915 device

@size: the size in bytes of backing storage to allocate for the

object

Creates a new object that wraps some internal memory for private

use.

This object is not backed by swappable storage, and as such its

contents

are volatile and only valid whilst pinned. If the object is

reaped by the

shrinker, its pages and data will be discarded. Equally, it is

not a full

GEM object and so not valid for access from userspace. This makes

it useful

for hardware interfaces like ringbuffers (which are pinned from

the time

the request is written to the time the hardware stops accessing

it), but

not for contexts (which need to be preserved when not active for

later

reuse). Note that it is not cleared upon allocation.

*/

struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, -                                 const struct drm_i915_gem_object_ops *ops, -                                 phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, +                               phys_addr_t size) {         static struct lock_class_key lock_class;         struct drm_i915_gem_object *obj;         unsigned int cache_level; +       struct ttm_operation_ctx ctx = { +               .interruptible = true, +               .no_wait_gpu = false, +       }; +       int ret;         GEM_BUG_ON(!size);         GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915,                 return ERR_PTR(-ENOMEM);         drm_gem_private_object_init(&i915->drm, &obj->base, size); -       i915_gem_object_init(obj, ops, &lock_class, 0); -       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; +       i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, +                            I915_BO_ALLOC_VOLATILE);

+       INIT_LIST_HEAD(&obj->mm.region_link);

+       INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); +       mutex_init(&obj->ttm.get_io_page.lock); -       /* -        * Mark the object as volatile, such that the pages are marked as -        * dontneed whilst they are still pinned. As soon as they are unpinned -        * they are allowed to be reaped by the shrinker, and the caller is -        * expected to repopulate - the contents of this object are only valid -        * whilst active and pinned. -        */ -       i915_gem_object_set_volatile(obj); +       obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); +       ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, +                                  ttm_bo_type_kernel, i915_ttm_sys_placement(), +                                  0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); +       if (ret) { +               ret = i915_ttm_err_to_gem(ret); +               i915_gem_object_free(obj); +               return ERR_PTR(ret); +       }

+       obj->ttm.created = true;         obj->read_domains = I915_GEM_DOMAIN_CPU;         obj->write_domain = I915_GEM_DOMAIN_CPU;

+       obj->mem_flags &= ~I915_BO_FLAG_IOMEM; +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;         cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;         i915_gem_object_set_cache_coherency(obj, cache_level); +       i915_gem_object_unlock(obj);         return obj; } -/**

i915_gem_object_create_internal: create an object with volatile

pages

@i915: the i915 device

@size: the size in bytes of backing storage to allocate for the

object

Creates a new object that wraps some internal memory for private

use.

This object is not backed by swappable storage, and as such its

contents

are volatile and only valid whilst pinned. If the object is

reaped by the

shrinker, its pages and data will be discarded. Equally, it is

not a full

GEM object and so not valid for access from userspace. This makes

it useful

for hardware interfaces like ringbuffers (which are pinned from

the time

the request is written to the time the hardware stops accessing

it), but

not for contexts (which need to be preserved when not active for

later

reuse). Note that it is not cleared upon allocation.

*/

-struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, -                               phys_addr_t size) -{ -       return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);

While we don't have a TTM shmem backend ready yet for internal,

Did you consider setting up just yet another region, INTEL_REGION_INTERNAL, .class = INTEL_MEMORY_SYSTEM and .instance = 1,

And make it create a TTM system region on integrated, and use same region as INTEL_REGION_SMEM on dgfx.

I think ttm should automatically map that to I915_PL_SYSTEM and the backwards mapping in i915_ttm_region() should never get called since the object is never moved.

Then I figure it should suffice to just call __i915_gem_ttm_object_init() and we could drop a lot of code.

/Thomas

...

-} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915,                                 phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, -                                 const struct drm_i915_gem_object_ops *ops, -                                 phys_addr_t size);

#endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void)         return &i915_sys_placement; } -static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) {         /* Fastpath */         if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void)         return &i915_ttm_bo_driver; } -static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, -                               struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, +                        struct ttm_placement *placement) {         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);         struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj,         return __i915_ttm_migrate(obj, mr, obj->flags); } -static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, -                              struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, +                       struct sg_table *st) {         /*          * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) {         GEM_BUG_ON(!obj->ttm.created); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo); /** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { -       if (bo->destroy != i915_ttm_bo_destroy) +       if (bo->destroy != i915_ttm_bo_destroy && +           bo->destroy != i915_ttm_internal_bo_destroy) {                 return NULL; +       }         return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,                          struct ttm_resource *res); void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj); int i915_ttm_purge(struct drm_i915_gem_object *obj); @@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)         /* Once / if we support GGTT, this is also false for cached ttm_tts */         return mem->mem_type != I915_PL_SYSTEM; }

+int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, +                        struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err);

#endif

Robert Beckett

23 May 23 May

3:52 p.m.

New subject: [PATCH 4/4] drm/i915: internal buffers use ttm backend

On 11/05/2022 15:14, Thomas Hellström wrote:

...

On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote:

...
refactor internal buffer backend to allocate volatile pages via ttm pool allocator

Signed-off-by: Robert Beckett bob.beckett@collabora.com

drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++---------

drivers/gpu/drm/i915/gem/i915_gem_internal.h |   5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.c      | 12 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h      | 12 +- 4 files changed, 125 insertions(+), 168 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..815ec9466cc0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,156 +4,119 @@ * Copyright © 2014-2016 Intel Corporation */

-#include <linux/scatterlist.h> -#include <linux/slab.h> -#include <linux/swiotlb.h>

+#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include "drm/ttm/ttm_bo_api.h" +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h"

-#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)

-static void internal_free_pages(struct sg_table *st) -{ -       struct scatterlist *sg;

-       for (sg = st->sgl; sg; sg = __sg_next(sg)) { -               if (sg_page(sg)) -                       __free_pages(sg_page(sg), get_order(sg-

...
length));

-       }

-       sg_free_table(st); -       kfree(st); -}

-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) { -       struct drm_i915_private *i915 = to_i915(obj->base.dev); -       struct sg_table *st; -       struct scatterlist *sg; -       unsigned int sg_page_sizes; -       unsigned int npages; -       int max_order; -       gfp_t gfp;

-       max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB -       if (is_swiotlb_active(obj->base.dev->dev)) { -               unsigned int max_segment;

-               max_segment = swiotlb_max_segment(); -               if (max_segment) { -                       max_segment = max_t(unsigned int, max_segment, -                                           PAGE_SIZE) >> PAGE_SHIFT; -                       max_order = min(max_order, ilog2(max_segment)); -               } +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); +       struct ttm_operation_ctx ctx = { +               .interruptible = true, +               .no_wait_gpu = false, +       }; +       struct ttm_place place = { +               .fpfn = 0, +               .lpfn = 0, +               .mem_type = I915_PL_SYSTEM, +               .flags = 0, +       }; +       struct ttm_placement placement = { +               .num_placement = 1, +               .placement = &place, +               .num_busy_placement = 0, +               .busy_placement = NULL, +       }; +       int ret;

+       ret = ttm_bo_validate(bo, &placement, &ctx); +       if (ret) { +               ret = i915_ttm_err_to_gem(ret); +               return ret;         } -#endif

-       gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; -       if (IS_I965GM(i915) || IS_I965G(i915)) { -               /* 965gm cannot relocate objects above 4GiB. */ -               gfp &= ~__GFP_HIGHMEM; -               gfp |= __GFP_DMA32;

It looks like we're losing this restriction?

There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 allocations.

agreed. will fix for v2

...

...
+       if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { +               ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); +               if (ret) +                       return ret;         }

-create_st: -       st = kmalloc(sizeof(*st), GFP_KERNEL); -       if (!st) -               return -ENOMEM; +       if (!i915_gem_object_has_pages(obj)) { +               struct i915_refct_sgt *rsgt = +                       i915_ttm_resource_get_st(obj, bo->resource);

-       npages = obj->base.size / PAGE_SIZE; -       if (sg_alloc_table(st, npages, GFP_KERNEL)) { -               kfree(st); -               return -ENOMEM; -       } +               if (IS_ERR(rsgt)) +                       return PTR_ERR(rsgt);

-       sg = st->sgl; -       st->nents = 0; -       sg_page_sizes = 0;

-       do { -               int order = min(fls(npages) - 1, max_order); -               struct page *page;

-               do { -                       page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), -                                          order); -                       if (page) -                               break; -                       if (!order--) -                               goto err;

-                       /* Limit subsequent allocations as well */ -                       max_order = order; -               } while (1);

-               sg_set_page(sg, page, PAGE_SIZE << order, 0); -               sg_page_sizes |= PAGE_SIZE << order; -               st->nents++;

-               npages -= 1 << order; -               if (!npages) { -                       sg_mark_end(sg); -                       break; -               }

-               sg = __sg_next(sg); -       } while (1);

-       if (i915_gem_gtt_prepare_pages(obj, st)) { -               /* Failed to dma-map try again with single page sg segments */ -               if (get_order(st->sgl->length)) { -                       internal_free_pages(st); -                       max_order = 0; -                       goto create_st; -               } -               goto err; +               GEM_BUG_ON(obj->mm.rsgt); +               obj->mm.rsgt = rsgt; +               __i915_gem_object_set_pages(obj, &rsgt->table, +                                           i915_sg_dma_sizes(rsgt-

...
table.sgl));

}

-       __i915_gem_object_set_pages(obj, st, sg_page_sizes); +       GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo-

...
ttm->num_pages));

+       i915_ttm_adjust_lru(obj);

return 0; +}

-err: -       sg_set_page(sg, NULL, 0, 0); -       sg_mark_end(sg); -       internal_free_pages(st); +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { +       .name = "i915_gem_object_ttm", +       .flags = I915_GEM_OBJECT_IS_SHRINKABLE,

-       return -ENOMEM; -} +       .get_pages = i915_internal_get_pages, +       .put_pages = i915_ttm_put_pages, +       .adjust_lru = i915_ttm_adjust_lru, +       .delayed_free = i915_ttm_delayed_free, +};

-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, -                                              struct sg_table *pages) +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) { -       i915_gem_gtt_finish_pages(obj, pages); -       internal_free_pages(pages); +       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);

-       obj->mm.dirty = false; +       mutex_destroy(&obj->ttm.get_io_page.lock);

-       __start_cpu_write(obj); -} +       if (obj->ttm.created) { +               /* This releases all gem object bindings to the backend. */ +               __i915_gem_free_object(obj);

-static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { -       .name = "i915_gem_object_internal", -       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, -       .get_pages = i915_gem_object_get_pages_internal, -       .put_pages = i915_gem_object_put_pages_internal, -}; +               call_rcu(&obj->rcu, __i915_gem_free_object_rcu); +       } else { +               __i915_gem_object_fini(obj); +       } +}

+/**

i915_gem_object_create_internal: create an object with volatile

pages

@i915: the i915 device

@size: the size in bytes of backing storage to allocate for the

object

Creates a new object that wraps some internal memory for private

use.

This object is not backed by swappable storage, and as such its

contents

are volatile and only valid whilst pinned. If the object is

reaped by the

shrinker, its pages and data will be discarded. Equally, it is

not a full

GEM object and so not valid for access from userspace. This makes

it useful

for hardware interfaces like ringbuffers (which are pinned from

the time

the request is written to the time the hardware stops accessing

it), but

not for contexts (which need to be preserved when not active for

later

reuse). Note that it is not cleared upon allocation.

*/

struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, -                                 const struct drm_i915_gem_object_ops *ops, -                                 phys_addr_t size) +i915_gem_object_create_internal(struct drm_i915_private *i915, +                               phys_addr_t size) {         static struct lock_class_key lock_class;         struct drm_i915_gem_object *obj;         unsigned int cache_level; +       struct ttm_operation_ctx ctx = { +               .interruptible = true, +               .no_wait_gpu = false, +       }; +       int ret;

GEM_BUG_ON(!size);         GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct drm_i915_private *i915,                 return ERR_PTR(-ENOMEM);

drm_gem_private_object_init(&i915->drm, &obj->base, size); -       i915_gem_object_init(obj, ops, &lock_class, 0); -       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; +       i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, +                            I915_BO_ALLOC_VOLATILE);

+       INIT_LIST_HEAD(&obj->mm.region_link);

+       INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); +       mutex_init(&obj->ttm.get_io_page.lock);

-       /* -        * Mark the object as volatile, such that the pages are marked as -        * dontneed whilst they are still pinned. As soon as they are unpinned -        * they are allowed to be reaped by the shrinker, and the caller is -        * expected to repopulate - the contents of this object are only valid -        * whilst active and pinned. -        */ -       i915_gem_object_set_volatile(obj); +       obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);

+       ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, +                                  ttm_bo_type_kernel, i915_ttm_sys_placement(), +                                  0, &ctx, NULL, NULL, i915_ttm_internal_bo_destroy); +       if (ret) { +               ret = i915_ttm_err_to_gem(ret); +               i915_gem_object_free(obj); +               return ERR_PTR(ret); +       }

+       obj->ttm.created = true;         obj->read_domains = I915_GEM_DOMAIN_CPU;         obj->write_domain = I915_GEM_DOMAIN_CPU;

+       obj->mem_flags &= ~I915_BO_FLAG_IOMEM; +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;         cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;         i915_gem_object_set_cache_coherency(obj, cache_level); +       i915_gem_object_unlock(obj);

return obj; }

-/**

i915_gem_object_create_internal: create an object with volatile

pages

@i915: the i915 device

@size: the size in bytes of backing storage to allocate for the

object

Creates a new object that wraps some internal memory for private

use.

This object is not backed by swappable storage, and as such its

contents

are volatile and only valid whilst pinned. If the object is

reaped by the

shrinker, its pages and data will be discarded. Equally, it is

not a full

GEM object and so not valid for access from userspace. This makes

it useful

for hardware interfaces like ringbuffers (which are pinned from

the time

the request is written to the time the hardware stops accessing

it), but

not for contexts (which need to be preserved when not active for

later

reuse). Note that it is not cleared upon allocation.

*/

-struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, -                               phys_addr_t size) -{ -       return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);

While we don't have a TTM shmem backend ready yet for internal,

Did you consider setting up just yet another region, INTEL_REGION_INTERNAL, .class = INTEL_MEMORY_SYSTEM and .instance = 1,

And make it create a TTM system region on integrated, and use same region as INTEL_REGION_SMEM on dgfx.

I think ttm should automatically map that to I915_PL_SYSTEM and the backwards mapping in i915_ttm_region() should never get called since the object is never moved.

Then I figure it should suffice to just call __i915_gem_ttm_object_init() and we could drop a lot of code.

i briefly considered using a new fake region, but with current precedent mapping memory regions to real segemented memory areas I considered it an abuse of the semantics of memory regions.

If we are happy to have fake regions, I can revert it back to a previous design of using system region for discreet and add a fake region setup for integrated.

Would this be preferred over the current design?

...

/Thomas

...
-} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h index 6664e06112fc..524e1042b20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h @@ -15,9 +15,4 @@ struct drm_i915_private; struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915,                                 phys_addr_t size); -struct drm_i915_gem_object * -__i915_gem_object_create_internal(struct drm_i915_private *i915, -                                 const struct drm_i915_gem_object_ops *ops, -                                 phys_addr_t size);

#endif /* __I915_GEM_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fdb3a1c18cb6..92195ead8c11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void)         return &i915_sys_placement; }

-static int i915_ttm_err_to_gem(int err) +int i915_ttm_err_to_gem(int err) {         /* Fastpath */         if (likely(!err)) @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void)         return &i915_ttm_bo_driver; }

-static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, -                               struct ttm_placement *placement) +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, +                        struct ttm_placement *placement) {         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);         struct ttm_operation_ctx ctx = { @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj,         return __i915_ttm_migrate(obj, mr, obj->flags); }

-static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, -                              struct sg_table *st) +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, +                       struct sg_table *st) {         /*          * We're currently not called from a shrinker, so put_pages() @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) * it's not idle, and using the TTM destroyed list handling could help us * benefit from that. */ -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) {         GEM_BUG_ON(!obj->ttm.created);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..06701c46d8e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) * i915 ttm gem object destructor. Internal use only. */ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo);

/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { -       if (bo->destroy != i915_ttm_bo_destroy) +       if (bo->destroy != i915_ttm_bo_destroy && +           bo->destroy != i915_ttm_internal_bo_destroy) {                 return NULL; +       }

return container_of(bo, struct drm_i915_gem_object, __do_not_access); } @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,                          struct ttm_resource *res);

void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj);

int i915_ttm_purge(struct drm_i915_gem_object *obj);

@@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)         /* Once / if we support GGTT, this is also false for cached ttm_tts */         return mem->mem_type != I915_PL_SYSTEM; }

+int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, +                        struct ttm_placement *placement); +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st); +int i915_ttm_err_to_gem(int err);

#endif

1074

Age (days ago)

1094

Last active (days ago)

dri-devel@lists.freedesktop.org

11 comments

2 participants

tags (0)

participants (2)

Robert Beckett
Thomas Hellström