As far as I can tell the buffer_count was never used by an userspace application.
The number of BOs in the system is far better suited in debugfs than sysfs and we now should be able to add other information here as well.
v2: add that additionally to sysfs
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/ttm/ttm_bo.c | 48 ++------------------------------ drivers/gpu/drm/ttm/ttm_module.c | 4 +++ drivers/gpu/drm/ttm/ttm_module.h | 6 ++-- 3 files changed, 11 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 31e8b3da5563..cd55e3104e50 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -44,8 +44,6 @@
#include "ttm_module.h"
-static void ttm_bo_global_kobj_release(struct kobject *kobj); - /* * ttm_global_mutex - protecting the global BO state */ @@ -54,11 +52,6 @@ unsigned ttm_bo_glob_use_count; struct ttm_bo_global ttm_bo_glob; EXPORT_SYMBOL(ttm_bo_glob);
-static struct attribute ttm_bo_count = { - .name = "bo_count", - .mode = S_IRUGO -}; - /* default destructor */ static void ttm_bo_default_destroy(struct ttm_buffer_object *bo) { @@ -84,32 +77,6 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, } }
-static ssize_t ttm_bo_global_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) -{ - struct ttm_bo_global *glob = - container_of(kobj, struct ttm_bo_global, kobj); - - return snprintf(buffer, PAGE_SIZE, "%d\n", - atomic_read(&glob->bo_count)); -} - -static struct attribute *ttm_bo_global_attrs[] = { - &ttm_bo_count, - NULL -}; - -static const struct sysfs_ops ttm_bo_global_ops = { - .show = &ttm_bo_global_show -}; - -static struct kobj_type ttm_bo_glob_kobj_type = { - .release = &ttm_bo_global_kobj_release, - .sysfs_ops = &ttm_bo_global_ops, - .default_attrs = ttm_bo_global_attrs -}; - static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; @@ -1226,14 +1193,6 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, } EXPORT_SYMBOL(ttm_bo_dma_acc_size);
-static void ttm_bo_global_kobj_release(struct kobject *kobj) -{ - struct ttm_bo_global *glob = - container_of(kobj, struct ttm_bo_global, kobj); - - __free_page(glob->dummy_read_page); -} - static void ttm_bo_global_release(void) { struct ttm_bo_global *glob = &ttm_bo_glob; @@ -1245,6 +1204,7 @@ static void ttm_bo_global_release(void) kobject_del(&glob->kobj); kobject_put(&glob->kobj); ttm_mem_global_release(&ttm_mem_glob); + __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); out: mutex_unlock(&ttm_global_mutex); @@ -1277,10 +1237,8 @@ static int ttm_bo_global_init(void) INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);
- ret = kobject_init_and_add( - &glob->kobj, &ttm_bo_glob_kobj_type, ttm_get_kobj(), "buffer_objects"); - if (unlikely(ret != 0)) - kobject_put(&glob->kobj); + debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, + &glob->bo_count); out: mutex_unlock(&ttm_global_mutex); return ret; diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index c0906437cb1c..f6566603a60f 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -32,12 +32,14 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/sched.h> +#include <linux/debugfs.h> #include <drm/drm_sysfs.h>
#include "ttm_module.h"
static DECLARE_WAIT_QUEUE_HEAD(exit_q); static atomic_t device_released; +struct dentry *ttm_debugfs_root;
static struct device_type ttm_drm_class_type = { .name = "ttm", @@ -77,6 +79,7 @@ static int __init ttm_init(void) if (unlikely(ret != 0)) goto out_no_dev_reg;
+ ttm_debugfs_root = debugfs_create_dir("ttm", NULL); return 0; out_no_dev_reg: atomic_set(&device_released, 1); @@ -94,6 +97,7 @@ static void __exit ttm_exit(void) */
wait_event(exit_q, atomic_read(&device_released) == 1); + debugfs_remove(ttm_debugfs_root); }
module_init(ttm_init); diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h index 45fa318c1585..2f03c2fcf570 100644 --- a/drivers/gpu/drm/ttm/ttm_module.h +++ b/drivers/gpu/drm/ttm/ttm_module.h @@ -31,10 +31,12 @@ #ifndef _TTM_MODULE_H_ #define _TTM_MODULE_H_
-#include <linux/kernel.h> +#define TTM_PFX "[TTM] " + struct kobject; +struct dentry;
-#define TTM_PFX "[TTM] " extern struct kobject *ttm_get_kobj(void); +extern struct dentry *ttm_debugfs_root;
#endif /* _TTM_MODULE_H_ */
Instead of printing this on the per device pool.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_pool.c | 70 ++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 7b2f60616750..1d61e8fc0e81 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -42,6 +42,8 @@ #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_tt.h>
+#include "ttm_module.h" + /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -543,6 +545,17 @@ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) return count; }
+/* Print a nice header for the order */ +static void ttm_pool_debugfs_header(struct seq_file *m) +{ + unsigned int i; + + seq_puts(m, "\t "); + for (i = 0; i < MAX_ORDER; ++i) + seq_printf(m, " ---%2u---", i); + seq_puts(m, "\n"); +} + /* Dump information about the different pool types */ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, struct seq_file *m) @@ -554,6 +567,35 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, seq_puts(m, "\n"); }
+/* Dump the total amount of allocated pages */ +static void ttm_pool_debugfs_footer(struct seq_file *m) +{ + seq_printf(m, "\ntotal\t: %8lu of %8lu\n", + atomic_long_read(&allocated_pages), page_pool_size); +} + +/* Dump the information for the global pools */ +static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data) +{ + ttm_pool_debugfs_header(m); + + spin_lock(&shrinker_lock); + seq_puts(m, "wc\t:"); + ttm_pool_debugfs_orders(global_write_combined, m); + seq_puts(m, "uc\t:"); + ttm_pool_debugfs_orders(global_uncached, m); + seq_puts(m, "wc 32\t:"); + ttm_pool_debugfs_orders(global_dma32_write_combined, m); + seq_puts(m, "uc 32\t:"); + ttm_pool_debugfs_orders(global_dma32_uncached, m); + spin_unlock(&shrinker_lock); + + ttm_pool_debugfs_footer(m); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals); + /** * ttm_pool_debugfs - Debugfs dump function for a pool * @@ -566,23 +608,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
- spin_lock(&shrinker_lock); - - seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) - seq_printf(m, " ---%2u---", i); - seq_puts(m, "\n"); - - seq_puts(m, "wc\t:"); - ttm_pool_debugfs_orders(global_write_combined, m); - seq_puts(m, "uc\t:"); - ttm_pool_debugfs_orders(global_uncached, m); - - seq_puts(m, "wc 32\t:"); - ttm_pool_debugfs_orders(global_dma32_write_combined, m); - seq_puts(m, "uc 32\t:"); - ttm_pool_debugfs_orders(global_dma32_uncached, m); + ttm_pool_debugfs_header(m);
+ spin_lock(&shrinker_lock); for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { seq_puts(m, "DMA "); switch (i) { @@ -598,12 +626,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) } ttm_pool_debugfs_orders(pool->caching[i].orders, m); } - - seq_printf(m, "\ntotal\t: %8lu of %8lu\n", - atomic_long_read(&allocated_pages), page_pool_size); - spin_unlock(&shrinker_lock);
+ ttm_pool_debugfs_footer(m); return 0; } EXPORT_SYMBOL(ttm_pool_debugfs); @@ -660,6 +685,11 @@ int ttm_pool_mgr_init(unsigned long num_pages) ttm_uncached, i); }
+#ifdef CONFIG_DEBUG_FS + debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL, + &ttm_pool_debugfs_globals_fops); +#endif + mm_shrinker.count_objects = ttm_pool_shrinker_count; mm_shrinker.scan_objects = ttm_pool_shrinker_scan; mm_shrinker.seeks = 1;
On Fri, Dec 18, 2020 at 06:55:36PM +0100, Christian König wrote:
Instead of printing this on the per device pool.
Signed-off-by: Christian König christian.koenig@amd.com
Hm shouldn't we remove this from radeon/amdgpu too and drop the EXPORT_SYMBOL for ttm_pool_debugfs? I'm kinda confused why we still have both ... -Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 70 ++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 7b2f60616750..1d61e8fc0e81 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -42,6 +42,8 @@ #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_tt.h>
+#include "ttm_module.h"
/**
- struct ttm_pool_dma - Helper object for coherent DMA mappings
@@ -543,6 +545,17 @@ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) return count; }
+/* Print a nice header for the order */ +static void ttm_pool_debugfs_header(struct seq_file *m) +{
- unsigned int i;
- seq_puts(m, "\t ");
- for (i = 0; i < MAX_ORDER; ++i)
seq_printf(m, " ---%2u---", i);
- seq_puts(m, "\n");
+}
/* Dump information about the different pool types */ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, struct seq_file *m) @@ -554,6 +567,35 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, seq_puts(m, "\n"); }
+/* Dump the total amount of allocated pages */ +static void ttm_pool_debugfs_footer(struct seq_file *m) +{
- seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
atomic_long_read(&allocated_pages), page_pool_size);
+}
+/* Dump the information for the global pools */ +static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data) +{
- ttm_pool_debugfs_header(m);
- spin_lock(&shrinker_lock);
- seq_puts(m, "wc\t:");
- ttm_pool_debugfs_orders(global_write_combined, m);
- seq_puts(m, "uc\t:");
- ttm_pool_debugfs_orders(global_uncached, m);
- seq_puts(m, "wc 32\t:");
- ttm_pool_debugfs_orders(global_dma32_write_combined, m);
- seq_puts(m, "uc 32\t:");
- ttm_pool_debugfs_orders(global_dma32_uncached, m);
- spin_unlock(&shrinker_lock);
- ttm_pool_debugfs_footer(m);
- return 0;
+} +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals);
/**
- ttm_pool_debugfs - Debugfs dump function for a pool
@@ -566,23 +608,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
- spin_lock(&shrinker_lock);
- seq_puts(m, "\t ");
- for (i = 0; i < MAX_ORDER; ++i)
seq_printf(m, " ---%2u---", i);
- seq_puts(m, "\n");
- seq_puts(m, "wc\t:");
- ttm_pool_debugfs_orders(global_write_combined, m);
- seq_puts(m, "uc\t:");
- ttm_pool_debugfs_orders(global_uncached, m);
- seq_puts(m, "wc 32\t:");
- ttm_pool_debugfs_orders(global_dma32_write_combined, m);
- seq_puts(m, "uc 32\t:");
- ttm_pool_debugfs_orders(global_dma32_uncached, m);
ttm_pool_debugfs_header(m);
spin_lock(&shrinker_lock); for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { seq_puts(m, "DMA "); switch (i) {
@@ -598,12 +626,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) } ttm_pool_debugfs_orders(pool->caching[i].orders, m); }
- seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
atomic_long_read(&allocated_pages), page_pool_size);
- spin_unlock(&shrinker_lock);
- ttm_pool_debugfs_footer(m); return 0;
} EXPORT_SYMBOL(ttm_pool_debugfs); @@ -660,6 +685,11 @@ int ttm_pool_mgr_init(unsigned long num_pages) ttm_uncached, i); }
+#ifdef CONFIG_DEBUG_FS
- debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
&ttm_pool_debugfs_globals_fops);
+#endif
- mm_shrinker.count_objects = ttm_pool_shrinker_count; mm_shrinker.scan_objects = ttm_pool_shrinker_scan; mm_shrinker.seeks = 1;
-- 2.25.1
Useful for testing.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_pool.c | 50 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1d61e8fc0e81..1cdacd58753a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -529,6 +529,28 @@ void ttm_pool_fini(struct ttm_pool *pool) } EXPORT_SYMBOL(ttm_pool_fini);
+/* As long as pages are available make sure to release at least one */ +static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long num_freed = 0; + + do + num_freed += ttm_pool_shrink(); + while (!num_freed && atomic_long_read(&allocated_pages)); + + return num_freed; +} + +/* Return the number of pages available or SHRINK_EMPTY if we have none */ +static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long num_pages = atomic_long_read(&allocated_pages); + + return num_pages ? num_pages : SHRINK_EMPTY; +} + #ifdef CONFIG_DEBUG_FS /* Count the number of pages available in a pool_type */ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) @@ -633,29 +655,19 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) } EXPORT_SYMBOL(ttm_pool_debugfs);
-#endif - -/* As long as pages are available make sure to release at least one */ -static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, - struct shrink_control *sc) +/* Test the shrinker functions and dump the result */ +static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) { - unsigned long num_freed = 0; + struct shrink_control sc = { .gfp_mask = GFP_KERNEL };
- do - num_freed += ttm_pool_shrink(); - while (!num_freed && atomic_long_read(&allocated_pages)); + seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc), + ttm_pool_shrinker_scan(&mm_shrinker, &sc));
- return num_freed; + return 0; } +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
-/* Return the number of pages available or SHRINK_EMPTY if we have none */ -static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, - struct shrink_control *sc) -{ - unsigned long num_pages = atomic_long_read(&allocated_pages); - - return num_pages ? num_pages : SHRINK_EMPTY; -} +#endif
/** * ttm_pool_mgr_init - Initialize globals @@ -688,6 +700,8 @@ int ttm_pool_mgr_init(unsigned long num_pages) #ifdef CONFIG_DEBUG_FS debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL, &ttm_pool_debugfs_globals_fops); + debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL, + &ttm_pool_debugfs_shrink_fops); #endif
mm_shrinker.count_objects = ttm_pool_shrinker_count;
On Fri, Dec 18, 2020 at 06:55:37PM +0100, Christian König wrote:
Useful for testing.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_pool.c | 50 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1d61e8fc0e81..1cdacd58753a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -529,6 +529,28 @@ void ttm_pool_fini(struct ttm_pool *pool) } EXPORT_SYMBOL(ttm_pool_fini);
+/* As long as pages are available make sure to release at least one */ +static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
struct shrink_control *sc)
+{
- unsigned long num_freed = 0;
- do
num_freed += ttm_pool_shrink();
- while (!num_freed && atomic_long_read(&allocated_pages));
- return num_freed;
+}
+/* Return the number of pages available or SHRINK_EMPTY if we have none */ +static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
struct shrink_control *sc)
+{
- unsigned long num_pages = atomic_long_read(&allocated_pages);
- return num_pages ? num_pages : SHRINK_EMPTY;
+}
#ifdef CONFIG_DEBUG_FS /* Count the number of pages available in a pool_type */ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) @@ -633,29 +655,19 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) } EXPORT_SYMBOL(ttm_pool_debugfs);
-#endif
-/* As long as pages are available make sure to release at least one */ -static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
struct shrink_control *sc)
+/* Test the shrinker functions and dump the result */ +static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) {
- unsigned long num_freed = 0;
- struct shrink_control sc = { .gfp_mask = GFP_KERNEL };
- do
num_freed += ttm_pool_shrink();
- while (!num_freed && atomic_long_read(&allocated_pages));
Need to wrap this in fs_reclaim_acquire/release(), otherwise lockdep won't connect the dots and the dedicated testing interface here isn't very useful.
Also this is a bit a confusing interface, I'd expect the show function to do the counting, and then the write function would try to write shrink as many objects as requests with the _scan function.
Both should be annotated with fs_reclaim_acquire/release.
- seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc),
ttm_pool_shrinker_scan(&mm_shrinker, &sc));
- return num_freed;
- return 0;
} +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
-/* Return the number of pages available or SHRINK_EMPTY if we have none */ -static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
struct shrink_control *sc)
-{
- unsigned long num_pages = atomic_long_read(&allocated_pages);
- return num_pages ? num_pages : SHRINK_EMPTY;
-} +#endif
/**
- ttm_pool_mgr_init - Initialize globals
@@ -688,6 +700,8 @@ int ttm_pool_mgr_init(unsigned long num_pages) #ifdef CONFIG_DEBUG_FS debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL, &ttm_pool_debugfs_globals_fops);
- debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
&ttm_pool_debugfs_shrink_fops);
I think an interface per pool would make more sense for dedicated testing. I'm kinda wondering whether we shouldn't do this in mm/ for all shrinkers by default, it's really quite neat for being able to test stuff. -Daniel
#endif
mm_shrinker.count_objects = ttm_pool_shrinker_count;
2.25.1
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) - ttm_pool_type_init(&pool->caching[i].orders[j], - pool, i, j); + if (use_dma_alloc) { + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) + for (j = 0; j < MAX_ORDER; ++j) + ttm_pool_type_init(&pool->caching[i].orders[j], + pool, i, j); + } } EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) - ttm_pool_type_fini(&pool->caching[i].orders[j]); + if (pool->use_dma_alloc) { + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) + for (j = 0; j < MAX_ORDER; ++j) + ttm_pool_type_fini(&pool->caching[i].orders[j]); + } } EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
+ if (!pool->use_dma_alloc) { + seq_puts(m, "unused\n"); + return 0; + } + ttm_pool_debugfs_header(m);
spin_lock(&shrinker_lock);
On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com
Ah, just realized the answer to my question on patch 2: The pools are per-device, due to dma_alloc_coherent being per-device (but really mostly it isn't, but that's what we have to deal with fighting the dma-api abstraction).
I think this would make a lot more sense if the shrinkers are per-pool (and also most of the debugfs files), since as-is in a multi-gpu system the first gpu's pool gets preferrentially thrashed. Which isn't a nice design. Splitting that into per gpu shrinkers means we get equal shrinking without having to maintain a global lru. This is how xfs seems to set up their shrinkers, and in general xfs people have a solid understanding of this stuff.
Aside: I think it also would make tons of sense to split up your new ttm bo shrinker up into a per-device lru, and throw the global system memory lru out the window completely :-) Assuming we can indeed get rid of it, and vmwgfx doesn't need it somewhere still.
Aside from this lgtm, but I guess will change a bit with that shuffling. -Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- }
} EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- }
} EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
if (!pool->use_dma_alloc) {
seq_puts(m, "unused\n");
return 0;
}
ttm_pool_debugfs_header(m);
spin_lock(&shrinker_lock);
-- 2.25.1
Am 22.12.20 um 14:51 schrieb Daniel Vetter:
On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com
Ah, just realized the answer to my question on patch 2: The pools are per-device, due to dma_alloc_coherent being per-device (but really mostly it isn't, but that's what we have to deal with fighting the dma-api abstraction).
I think this would make a lot more sense if the shrinkers are per-pool (and also most of the debugfs files), since as-is in a multi-gpu system the first gpu's pool gets preferrentially thrashed. Which isn't a nice design. Splitting that into per gpu shrinkers means we get equal shrinking without having to maintain a global lru. This is how xfs seems to set up their shrinkers, and in general xfs people have a solid understanding of this stuff.
Well fairness and not trashing the first GPUs pool is the reason why I implemented just one shrinker plus a global LRU.
In other words shrink_slab() just uses list_for_each_entry() on all shrinkers.
In the pool shrinker callback shrink one pool and move it to the end of the shrinker list.
Aside: I think it also would make tons of sense to split up your new ttm bo shrinker up into a per-device lru, and throw the global system memory lru out the window completely :-) Assuming we can indeed get rid of it, and vmwgfx doesn't need it somewhere still.
Yeah, I already have that as a patch set here, but I have this dependent on a larger rename of the device structures.
Aside from this lgtm, but I guess will change a bit with that shuffling.
Thanks for the review, going to send out a new version with the fs_reclaim_acquire/release added in a minute.
Christian.
-Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- } } EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- } } EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
if (!pool->use_dma_alloc) {
seq_puts(m, "unused\n");
return 0;
}
ttm_pool_debugfs_header(m);
spin_lock(&shrinker_lock);
-- 2.25.1
On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
Am 22.12.20 um 14:51 schrieb Daniel Vetter:
On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com
Ah, just realized the answer to my question on patch 2: The pools are per-device, due to dma_alloc_coherent being per-device (but really mostly it isn't, but that's what we have to deal with fighting the dma-api abstraction).
I think this would make a lot more sense if the shrinkers are per-pool (and also most of the debugfs files), since as-is in a multi-gpu system the first gpu's pool gets preferrentially thrashed. Which isn't a nice design. Splitting that into per gpu shrinkers means we get equal shrinking without having to maintain a global lru. This is how xfs seems to set up their shrinkers, and in general xfs people have a solid understanding of this stuff.
Well fairness and not trashing the first GPUs pool is the reason why I implemented just one shrinker plus a global LRU.
That's kinda defeating the point of how the core mm works. At least of how I'm understanding how it works. Imo we shouldn't try to re-implement this kind of balancing across different pools in our callback, since core mm tries pretty hard to equally shrink already (it only shrinks each shrinker a little bit each round, but does a lot of rounds under memory pressure).
Also maintaining your own global lru means global locking for the usual case of none-to-little memory contention, unecessarily wasting the fast path.
In other words shrink_slab() just uses list_for_each_entry() on all shrinkers.
In the pool shrinker callback shrink one pool and move it to the end of the shrinker list.
Aside: I think it also would make tons of sense to split up your new ttm bo shrinker up into a per-device lru, and throw the global system memory lru out the window completely :-) Assuming we can indeed get rid of it, and vmwgfx doesn't need it somewhere still.
Yeah, I already have that as a patch set here, but I have this dependent on a larger rename of the device structures.
Hm maybe include that in the next round, just for the bigger picture? Don't have to merge it all in one go, just want to make sure we agree on where we're going.
Aside from this lgtm, but I guess will change a bit with that shuffling.
Thanks for the review, going to send out a new version with the fs_reclaim_acquire/release added in a minute.
Cool.
Cheers, Daniel
Christian.
-Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- } } EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- } } EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
- if (!pool->use_dma_alloc) {
seq_puts(m, "unused\n");
return 0;
- }
- ttm_pool_debugfs_header(m); spin_lock(&shrinker_lock);
-- 2.25.1
Am 07.01.21 um 17:38 schrieb Daniel Vetter:
On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
Am 22.12.20 um 14:51 schrieb Daniel Vetter:
On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com
Ah, just realized the answer to my question on patch 2: The pools are per-device, due to dma_alloc_coherent being per-device (but really mostly it isn't, but that's what we have to deal with fighting the dma-api abstraction).
I think this would make a lot more sense if the shrinkers are per-pool (and also most of the debugfs files), since as-is in a multi-gpu system the first gpu's pool gets preferrentially thrashed. Which isn't a nice design. Splitting that into per gpu shrinkers means we get equal shrinking without having to maintain a global lru. This is how xfs seems to set up their shrinkers, and in general xfs people have a solid understanding of this stuff.
Well fairness and not trashing the first GPUs pool is the reason why I implemented just one shrinker plus a global LRU.
That's kinda defeating the point of how the core mm works. At least of how I'm understanding how it works. Imo we shouldn't try to re-implement this kind of balancing across different pools in our callback, since core mm tries pretty hard to equally shrink already (it only shrinks each shrinker a little bit each round, but does a lot of rounds under memory pressure).
Correct, see the problem is that we don't want to shrink from each pool on each round.
E.g. we have something like 48 global pools and 36 for each device which needs a DMA coherent pool.
On each round we want to shrink only one cached item from one pool and not 48.
Also maintaining your own global lru means global locking for the usual case of none-to-little memory contention, unecessarily wasting the fast path.
No, the fast path doesn't need to take the global LRU lock.
I've optimized this quite a bit by looking into the pools only once for each power of two.
In other words shrink_slab() just uses list_for_each_entry() on all shrinkers.
In the pool shrinker callback shrink one pool and move it to the end of the shrinker list.
Aside: I think it also would make tons of sense to split up your new ttm bo shrinker up into a per-device lru, and throw the global system memory lru out the window completely :-) Assuming we can indeed get rid of it, and vmwgfx doesn't need it somewhere still.
Yeah, I already have that as a patch set here, but I have this dependent on a larger rename of the device structures.
Hm maybe include that in the next round, just for the bigger picture? Don't have to merge it all in one go, just want to make sure we agree on where we're going.
I need to clean this set up quite a bit. Let's push this one here upstream first.
Aside from this lgtm, but I guess will change a bit with that shuffling.
Thanks for the review, going to send out a new version with the fs_reclaim_acquire/release added in a minute.
Cool.
Cheers, Daniel
Got distracted by bug fixes in the last two weeks, but really going to send that out now :)
Christian.
Christian.
-Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- } } EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- } } EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
- if (!pool->use_dma_alloc) {
seq_puts(m, "unused\n");
return 0;
- }
- ttm_pool_debugfs_header(m); spin_lock(&shrinker_lock);
-- 2.25.1
On Tue, Jan 19, 2021 at 01:11:36PM +0100, Christian König wrote:
Am 07.01.21 um 17:38 schrieb Daniel Vetter:
On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
Am 22.12.20 um 14:51 schrieb Daniel Vetter:
On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
Only initialize the DMA coherent pools if they are used.
Signed-off-by: Christian König christian.koenig@amd.com
Ah, just realized the answer to my question on patch 2: The pools are per-device, due to dma_alloc_coherent being per-device (but really mostly it isn't, but that's what we have to deal with fighting the dma-api abstraction).
I think this would make a lot more sense if the shrinkers are per-pool (and also most of the debugfs files), since as-is in a multi-gpu system the first gpu's pool gets preferrentially thrashed. Which isn't a nice design. Splitting that into per gpu shrinkers means we get equal shrinking without having to maintain a global lru. This is how xfs seems to set up their shrinkers, and in general xfs people have a solid understanding of this stuff.
Well fairness and not trashing the first GPUs pool is the reason why I implemented just one shrinker plus a global LRU.
That's kinda defeating the point of how the core mm works. At least of how I'm understanding how it works. Imo we shouldn't try to re-implement this kind of balancing across different pools in our callback, since core mm tries pretty hard to equally shrink already (it only shrinks each shrinker a little bit each round, but does a lot of rounds under memory pressure).
Correct, see the problem is that we don't want to shrink from each pool on each round.
E.g. we have something like 48 global pools and 36 for each device which needs a DMA coherent pool.
On each round we want to shrink only one cached item from one pool and not 48.
Hm if the pool is that small, then this feels like we're caching at the wrong level, and probably we should cache at the dma-api level. Or well, below that even.
Either way that kind of design stuff should be captured in an overview DOC: kerneldoc imo.
Also the point of shrinkers is that they really should be all sized equally, so if there's very little stuff in them, they shouldn't get shrunk on first round.
Otoh if they're huge, they will be shrunk big time. So aside from fringe effects of rounding slightly different since it's all integers, did you actually measure a benefit here? Or is this more conjecture about how you think shrinkers work or don't work?
Also maintaining your own global lru means global locking for the usual case of none-to-little memory contention, unecessarily wasting the fast path.
No, the fast path doesn't need to take the global LRU lock.
I've optimized this quite a bit by looking into the pools only once for each power of two.
In other words shrink_slab() just uses list_for_each_entry() on all shrinkers.
In the pool shrinker callback shrink one pool and move it to the end of the shrinker list.
Aside: I think it also would make tons of sense to split up your new ttm bo shrinker up into a per-device lru, and throw the global system memory lru out the window completely :-) Assuming we can indeed get rid of it, and vmwgfx doesn't need it somewhere still.
Yeah, I already have that as a patch set here, but I have this dependent on a larger rename of the device structures.
Hm maybe include that in the next round, just for the bigger picture? Don't have to merge it all in one go, just want to make sure we agree on where we're going.
I need to clean this set up quite a bit. Let's push this one here upstream first.
Hm yeah I guess we need to get somewhere first, but this feels a bit murky. I'll try and review more details for the next round at least. -Daniel
Aside from this lgtm, but I guess will change a bit with that shuffling.
Thanks for the review, going to send out a new version with the fs_reclaim_acquire/release added in a minute.
Cool.
Cheers, Daniel
Got distracted by bug fixes in the last two weeks, but really going to send that out now :)
Christian.
Christian.
-Daniel
drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1cdacd58753a..f09e34614226 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
- } } EXPORT_SYMBOL(ttm_pool_init);
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j;
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j < MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
- } } EXPORT_SYMBOL(ttm_pool_fini);
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i;
- if (!pool->use_dma_alloc) {
seq_puts(m, "unused\n");
return 0;
- }
- ttm_pool_debugfs_header(m); spin_lock(&shrinker_lock);
-- 2.25.1
On Fri, Dec 18, 2020 at 06:55:35PM +0100, Christian König wrote:
As far as I can tell the buffer_count was never used by an userspace application.
The number of BOs in the system is far better suited in debugfs than sysfs and we now should be able to add other information here as well.
v2: add that additionally to sysfs
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
Not sure where I acked this, but looks reasonable. -Daniel
drivers/gpu/drm/ttm/ttm_bo.c | 48 ++------------------------------ drivers/gpu/drm/ttm/ttm_module.c | 4 +++ drivers/gpu/drm/ttm/ttm_module.h | 6 ++-- 3 files changed, 11 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 31e8b3da5563..cd55e3104e50 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -44,8 +44,6 @@
#include "ttm_module.h"
-static void ttm_bo_global_kobj_release(struct kobject *kobj);
/*
- ttm_global_mutex - protecting the global BO state
*/ @@ -54,11 +52,6 @@ unsigned ttm_bo_glob_use_count; struct ttm_bo_global ttm_bo_glob; EXPORT_SYMBOL(ttm_bo_glob);
-static struct attribute ttm_bo_count = {
- .name = "bo_count",
- .mode = S_IRUGO
-};
/* default destructor */ static void ttm_bo_default_destroy(struct ttm_buffer_object *bo) { @@ -84,32 +77,6 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, } }
-static ssize_t ttm_bo_global_show(struct kobject *kobj,
struct attribute *attr,
char *buffer)
-{
- struct ttm_bo_global *glob =
container_of(kobj, struct ttm_bo_global, kobj);
- return snprintf(buffer, PAGE_SIZE, "%d\n",
atomic_read(&glob->bo_count));
-}
-static struct attribute *ttm_bo_global_attrs[] = {
- &ttm_bo_count,
- NULL
-};
-static const struct sysfs_ops ttm_bo_global_ops = {
- .show = &ttm_bo_global_show
-};
-static struct kobj_type ttm_bo_glob_kobj_type = {
- .release = &ttm_bo_global_kobj_release,
- .sysfs_ops = &ttm_bo_global_ops,
- .default_attrs = ttm_bo_global_attrs
-};
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; @@ -1226,14 +1193,6 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, } EXPORT_SYMBOL(ttm_bo_dma_acc_size);
-static void ttm_bo_global_kobj_release(struct kobject *kobj) -{
- struct ttm_bo_global *glob =
container_of(kobj, struct ttm_bo_global, kobj);
- __free_page(glob->dummy_read_page);
-}
static void ttm_bo_global_release(void) { struct ttm_bo_global *glob = &ttm_bo_glob; @@ -1245,6 +1204,7 @@ static void ttm_bo_global_release(void) kobject_del(&glob->kobj); kobject_put(&glob->kobj); ttm_mem_global_release(&ttm_mem_glob);
- __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob));
out: mutex_unlock(&ttm_global_mutex); @@ -1277,10 +1237,8 @@ static int ttm_bo_global_init(void) INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);
- ret = kobject_init_and_add(
&glob->kobj, &ttm_bo_glob_kobj_type, ttm_get_kobj(), "buffer_objects");
- if (unlikely(ret != 0))
kobject_put(&glob->kobj);
- debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
&glob->bo_count);
out: mutex_unlock(&ttm_global_mutex); return ret; diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index c0906437cb1c..f6566603a60f 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -32,12 +32,14 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/sched.h> +#include <linux/debugfs.h> #include <drm/drm_sysfs.h>
#include "ttm_module.h"
static DECLARE_WAIT_QUEUE_HEAD(exit_q); static atomic_t device_released; +struct dentry *ttm_debugfs_root;
static struct device_type ttm_drm_class_type = { .name = "ttm", @@ -77,6 +79,7 @@ static int __init ttm_init(void) if (unlikely(ret != 0)) goto out_no_dev_reg;
- ttm_debugfs_root = debugfs_create_dir("ttm", NULL); return 0;
out_no_dev_reg: atomic_set(&device_released, 1); @@ -94,6 +97,7 @@ static void __exit ttm_exit(void) */
wait_event(exit_q, atomic_read(&device_released) == 1);
- debugfs_remove(ttm_debugfs_root);
}
module_init(ttm_init); diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h index 45fa318c1585..2f03c2fcf570 100644 --- a/drivers/gpu/drm/ttm/ttm_module.h +++ b/drivers/gpu/drm/ttm/ttm_module.h @@ -31,10 +31,12 @@ #ifndef _TTM_MODULE_H_ #define _TTM_MODULE_H_
-#include <linux/kernel.h> +#define TTM_PFX "[TTM] "
struct kobject; +struct dentry;
-#define TTM_PFX "[TTM] " extern struct kobject *ttm_get_kobj(void); +extern struct dentry *ttm_debugfs_root;
#endif /* _TTM_MODULE_H_ */
2.25.1
dri-devel@lists.freedesktop.org