This patch provides infrastructure for deferring buffer frees.
This is a feature ION provided which when used with some form of a page pool, provides a nice performance boost in an allocation microbenchmark. The reason it helps is it allows the page-zeroing to be done out of the normal allocation/free path, and pushed off to a kthread.
As not all heaps will find this useful, its implemented as a optional helper library that heaps can utilize.
Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Liam Mark lmark@codeaurora.org Cc: Chris Goldsworthy cgoldswo@codeaurora.org Cc: Laura Abbott labbott@kernel.org Cc: Brian Starkey Brian.Starkey@arm.com Cc: Hridya Valsaraju hridya@google.com Cc: Suren Baghdasaryan surenb@google.com Cc: Sandeep Patil sspatil@google.com Cc: Daniel Mentz danielmentz@google.com Cc: Ørjan Eide orjan.eide@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Ezequiel Garcia ezequiel@collabora.com Cc: Simon Ser contact@emersion.fr Cc: James Jones jajones@nvidia.com Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz john.stultz@linaro.org --- drivers/dma-buf/heaps/Kconfig | 3 + drivers/dma-buf/heaps/Makefile | 1 + drivers/dma-buf/heaps/deferred-free-helper.c | 136 +++++++++++++++++++ drivers/dma-buf/heaps/deferred-free-helper.h | 15 ++ 4 files changed, 155 insertions(+) create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.c create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.h
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index a5eef06c4226..ecf65204f714 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -1,3 +1,6 @@ +config DMABUF_HEAPS_DEFERRED_FREE + bool + config DMABUF_HEAPS_SYSTEM bool "DMA-BUF System Heap" depends on DMABUF_HEAPS diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile index 974467791032..4e7839875615 100644 --- a/drivers/dma-buf/heaps/Makefile +++ b/drivers/dma-buf/heaps/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c new file mode 100644 index 000000000000..b8f54860454f --- /dev/null +++ b/drivers/dma-buf/heaps/deferred-free-helper.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Deferred dmabuf freeing helper + * + * Copyright (C) 2020 Linaro, Ltd. + * + * Based on the ION page pool code + * Copyright (C) 2011 Google, Inc. + */ + +#include <linux/freezer.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/sched/signal.h> + +#include "deferred-free-helper.h" + +static LIST_HEAD(free_list); +static size_t list_size; +wait_queue_head_t freelist_waitqueue; +struct task_struct *freelist_task; +static DEFINE_MUTEX(free_list_lock); + +enum { + USE_POOL = 0, + SKIP_POOL = 1, +}; + +void deferred_free(struct deferred_freelist_item *item, + void (*free)(struct deferred_freelist_item*, bool), + size_t size) +{ + INIT_LIST_HEAD(&item->list); + item->size = size; + item->free = free; + + mutex_lock(&free_list_lock); + list_add(&item->list, &free_list); + list_size += size; + mutex_unlock(&free_list_lock); + wake_up(&freelist_waitqueue); +} + +static size_t free_one_item(bool nopool) +{ + size_t size = 0; + struct deferred_freelist_item *item; + + mutex_lock(&free_list_lock); + if (list_empty(&free_list)) { + mutex_unlock(&free_list_lock); + return 0; + } + item = list_first_entry(&free_list, struct deferred_freelist_item, list); + list_del(&item->list); + size = item->size; + list_size -= size; + mutex_unlock(&free_list_lock); + + item->free(item, nopool); + return size; +} + +static unsigned long get_freelist_size(void) +{ + unsigned long size; + + mutex_lock(&free_list_lock); + size = list_size; + mutex_unlock(&free_list_lock); + return size; +} + +static unsigned long freelist_shrink_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + return get_freelist_size(); +} + +static unsigned long freelist_shrink_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + int total_freed = 0; + + if (sc->nr_to_scan == 0) + return 0; + + while (total_freed < sc->nr_to_scan) { + int freed = free_one_item(SKIP_POOL); + + if (!freed) + break; + + total_freed += freed; + } + + return total_freed; +} + +static struct shrinker freelist_shrinker = { + .count_objects = freelist_shrink_count, + .scan_objects = freelist_shrink_scan, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +static int deferred_free_thread(void *data) +{ + while (true) { + wait_event_freezable(freelist_waitqueue, + get_freelist_size() > 0); + + free_one_item(USE_POOL); + } + + return 0; +} + +static int deferred_freelist_init(void) +{ + list_size = 0; + + init_waitqueue_head(&freelist_waitqueue); + freelist_task = kthread_run(deferred_free_thread, NULL, + "%s", "dmabuf-deferred-free-worker"); + if (IS_ERR(freelist_task)) { + pr_err("%s: creating thread for deferred free failed\n", + __func__); + return -1; + } + sched_set_normal(freelist_task, 19); + + return register_shrinker(&freelist_shrinker); +} +device_initcall(deferred_freelist_init); diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h new file mode 100644 index 000000000000..09a2274a897c --- /dev/null +++ b/drivers/dma-buf/heaps/deferred-free-helper.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef DEFERRED_FREE_HELPER_H +#define DEFERRED_FREE_HELPER_H + +struct deferred_freelist_item { + size_t size; + void (*free)(struct deferred_freelist_item *i, bool no_pool); + struct list_head list; +}; + +void deferred_free(struct deferred_freelist_item *item, + void (*free)(struct deferred_freelist_item *i, bool no_pool), + size_t size); +#endif
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Liam Mark lmark@codeaurora.org Cc: Chris Goldsworthy cgoldswo@codeaurora.org Cc: Laura Abbott labbott@kernel.org Cc: Brian Starkey Brian.Starkey@arm.com Cc: Hridya Valsaraju hridya@google.com Cc: Suren Baghdasaryan surenb@google.com Cc: Sandeep Patil sspatil@google.com Cc: Daniel Mentz danielmentz@google.com Cc: Ørjan Eide orjan.eide@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Ezequiel Garcia ezequiel@collabora.com Cc: Simon Ser contact@emersion.fr Cc: James Jones jajones@nvidia.com Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz john.stultz@linaro.org --- drivers/dma-buf/heaps/Kconfig | 1 + drivers/dma-buf/heaps/system_heap.c | 68 +++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 4 deletions(-)
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index ecf65204f714..fa5e1c330cce 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -4,6 +4,7 @@ config DMABUF_HEAPS_DEFERRED_FREE config DMABUF_HEAPS_SYSTEM bool "DMA-BUF System Heap" depends on DMABUF_HEAPS + select PAGE_POOL help Choose this option to enable the system dmabuf heap. The system heap is backed by pages from the buddy allocator. If in doubt, say Y. diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 17e0e9a68baf..885e30894b77 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -20,6 +20,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <net/page_pool.h>
static struct dma_heap *sys_heap;
@@ -53,6 +54,7 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP}; */ static const unsigned int orders[] = {8, 4, 0}; #define NUM_ORDERS ARRAY_SIZE(orders) +struct page_pool *pools[NUM_ORDERS];
static struct sg_table *dup_sg_table(struct sg_table *table) { @@ -281,18 +283,59 @@ static void system_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) dma_buf_map_clear(map); }
+static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot) +{ + void *addr = vmap(pages, num, VM_MAP, pgprot); + + if (!addr) + return -ENOMEM; + memset(addr, 0, PAGE_SIZE * num); + vunmap(addr); + return 0; +} + +static int system_heap_zero_buffer(struct system_heap_buffer *buffer) +{ + struct sg_table *sgt = &buffer->sg_table; + struct sg_page_iter piter; + struct page *pages[32]; + int p = 0; + int ret = 0; + + for_each_sgtable_page(sgt, &piter, 0) { + pages[p++] = sg_page_iter_page(&piter); + if (p == ARRAY_SIZE(pages)) { + ret = system_heap_clear_pages(pages, p, PAGE_KERNEL); + if (ret) + return ret; + p = 0; + } + } + if (p) + ret = system_heap_clear_pages(pages, p, PAGE_KERNEL); + + return ret; +} + static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table; struct scatterlist *sg; - int i; + int i, j; + + /* Zero the buffer pages before adding back to the pool */ + system_heap_zero_buffer(buffer);
table = &buffer->sg_table; for_each_sg(table->sgl, sg, table->nents, i) { struct page *page = sg_page(sg);
- __free_pages(page, compound_order(page)); + for (j = 0; j < NUM_ORDERS; j++) { + if (compound_order(page) == orders[j]) + break; + } + page_pool_put_full_page(pools[j], page, false); } sg_free_table(table); kfree(buffer); @@ -322,8 +365,7 @@ static struct page *alloc_largest_available(unsigned long size, continue; if (max_order < orders[i]) continue; - - page = alloc_pages(order_flags[i], orders[i]); + page = page_pool_alloc_pages(pools[i], order_flags[i]); if (!page) continue; return page; @@ -428,6 +470,24 @@ static const struct dma_heap_ops system_heap_ops = { static int system_heap_create(void) { struct dma_heap_export_info exp_info; + int i; + + for (i = 0; i < NUM_ORDERS; i++) { + struct page_pool_params pp; + + memset(&pp, 0, sizeof(pp)); + pp.order = orders[i]; + pools[i] = page_pool_create(&pp); + + if (IS_ERR(pools[i])) { + int j; + + pr_err("%s: page pool creation failed!\n", __func__); + for (j = 0; j < i; j++) + page_pool_destroy(pools[j]); + return PTR_ERR(pools[i]); + } + }
exp_info.name = "system"; exp_info.ops = &system_heap_ops;
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Liam Mark lmark@codeaurora.org Cc: Chris Goldsworthy cgoldswo@codeaurora.org Cc: Laura Abbott labbott@kernel.org Cc: Brian Starkey Brian.Starkey@arm.com Cc: Hridya Valsaraju hridya@google.com Cc: Suren Baghdasaryan surenb@google.com Cc: Sandeep Patil sspatil@google.com Cc: Daniel Mentz danielmentz@google.com Cc: Ørjan Eide orjan.eide@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Ezequiel Garcia ezequiel@collabora.com Cc: Simon Ser contact@emersion.fr Cc: James Jones jajones@nvidia.com Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz john.stultz@linaro.org
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-) -Daniel
drivers/dma-buf/heaps/Kconfig | 1 + drivers/dma-buf/heaps/system_heap.c | 68 +++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 4 deletions(-)
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index ecf65204f714..fa5e1c330cce 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -4,6 +4,7 @@ config DMABUF_HEAPS_DEFERRED_FREE config DMABUF_HEAPS_SYSTEM bool "DMA-BUF System Heap" depends on DMABUF_HEAPS
- select PAGE_POOL help Choose this option to enable the system dmabuf heap. The system heap is backed by pages from the buddy allocator. If in doubt, say Y.
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 17e0e9a68baf..885e30894b77 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -20,6 +20,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <net/page_pool.h>
static struct dma_heap *sys_heap;
@@ -53,6 +54,7 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP}; */ static const unsigned int orders[] = {8, 4, 0}; #define NUM_ORDERS ARRAY_SIZE(orders) +struct page_pool *pools[NUM_ORDERS];
static struct sg_table *dup_sg_table(struct sg_table *table) { @@ -281,18 +283,59 @@ static void system_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) dma_buf_map_clear(map); }
+static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot) +{
- void *addr = vmap(pages, num, VM_MAP, pgprot);
- if (!addr)
return -ENOMEM;
- memset(addr, 0, PAGE_SIZE * num);
- vunmap(addr);
- return 0;
+}
+static int system_heap_zero_buffer(struct system_heap_buffer *buffer) +{
- struct sg_table *sgt = &buffer->sg_table;
- struct sg_page_iter piter;
- struct page *pages[32];
- int p = 0;
- int ret = 0;
- for_each_sgtable_page(sgt, &piter, 0) {
pages[p++] = sg_page_iter_page(&piter);
if (p == ARRAY_SIZE(pages)) {
ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
if (ret)
return ret;
p = 0;
}
- }
- if (p)
ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
- return ret;
+}
static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table; struct scatterlist *sg;
- int i;
int i, j;
/* Zero the buffer pages before adding back to the pool */
system_heap_zero_buffer(buffer);
table = &buffer->sg_table; for_each_sg(table->sgl, sg, table->nents, i) { struct page *page = sg_page(sg);
__free_pages(page, compound_order(page));
for (j = 0; j < NUM_ORDERS; j++) {
if (compound_order(page) == orders[j])
break;
}
} sg_free_table(table); kfree(buffer);page_pool_put_full_page(pools[j], page, false);
@@ -322,8 +365,7 @@ static struct page *alloc_largest_available(unsigned long size, continue; if (max_order < orders[i]) continue;
page = alloc_pages(order_flags[i], orders[i]);
if (!page) continue; return page;page = page_pool_alloc_pages(pools[i], order_flags[i]);
@@ -428,6 +470,24 @@ static const struct dma_heap_ops system_heap_ops = { static int system_heap_create(void) { struct dma_heap_export_info exp_info;
int i;
for (i = 0; i < NUM_ORDERS; i++) {
struct page_pool_params pp;
memset(&pp, 0, sizeof(pp));
pp.order = orders[i];
pools[i] = page_pool_create(&pp);
if (IS_ERR(pools[i])) {
int j;
pr_err("%s: page pool creation failed!\n", __func__);
for (j = 0; j < i; j++)
page_pool_destroy(pools[j]);
return PTR_ERR(pools[i]);
}
}
exp_info.name = "system"; exp_info.ops = &system_heap_ops;
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
thanks -john
On Fri, Dec 18, 2020 at 05:16:56PM -0800, John Stultz wrote:
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
Oops, I didn't look that closely and didn't realize you're reusing the one from net/core/.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
I think moving the page_pool from net into lib and using it in ttm might also be an option. Lack of shrinker in the networking one might be a bit a problem.
Also I guess the real fun will start if you pre-flush pages (after zeroing) or use write-combined mode (which tends to be a real pain to allocate). So for those I think we likely need more than just reusing the one net/core has already. -Daniel
On Mon, Dec 21, 2020 at 2:09 PM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Dec 18, 2020 at 05:16:56PM -0800, John Stultz wrote:
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
Oops, I didn't look that closely and didn't realize you're reusing the one from net/core/.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
I think moving the page_pool from net into lib and using it in ttm might also be an option. Lack of shrinker in the networking one might be a bit a problem.
Yea. I've been looking at this, to see how to abstract out a generic pool implementation, but each pool implementation has been tweaked for the specific use cases, so a general abstraction is a bit tough right off.
For example the ttm pool's handling allocations both from alloc_pages and dma_alloc in a pool, where the net page pool only uses alloc_pages (but can pre map via dma_map_attr).
And as you mentioned, the networking page pool is statically sized where the ttm pool is dynamic and shrinker controlled.
Further, as the ttm pool is utilized for keeping pools of pages set for specific cache types, it makes it difficult to abstract that out as we have to be able to reset the caching (set_pages_wb()) when shrinking, so that would also have to be pushed down into the pool attributes as well.
So far, in my attempts to share an abstraction for both the net page_pool and the ttm page pool, it seems to make the code complexity worse on both sides - so while I'm interested in continuing to try to find a way to share code here, I'm not sure it makes sense to hold up this series (which is already re-using an existing implementation and provide a performance bump in microbenchmarks) for the grand-unified-page-pool. Efforts to refactor the ttm pool and net page pool can continue on indepently, and I'd be happy to move the system heap to whatever that ends up being.
thanks -john
On Fri, Jan 22, 2021 at 05:28:32PM -0800, John Stultz wrote:
On Mon, Dec 21, 2020 at 2:09 PM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Dec 18, 2020 at 05:16:56PM -0800, John Stultz wrote:
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
Oops, I didn't look that closely and didn't realize you're reusing the one from net/core/.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
I think moving the page_pool from net into lib and using it in ttm might also be an option. Lack of shrinker in the networking one might be a bit a problem.
Yea. I've been looking at this, to see how to abstract out a generic pool implementation, but each pool implementation has been tweaked for the specific use cases, so a general abstraction is a bit tough right off.
For example the ttm pool's handling allocations both from alloc_pages and dma_alloc in a pool, where the net page pool only uses alloc_pages (but can pre map via dma_map_attr).
And as you mentioned, the networking page pool is statically sized where the ttm pool is dynamic and shrinker controlled.
Further, as the ttm pool is utilized for keeping pools of pages set for specific cache types, it makes it difficult to abstract that out as we have to be able to reset the caching (set_pages_wb()) when shrinking, so that would also have to be pushed down into the pool attributes as well.
So far, in my attempts to share an abstraction for both the net page_pool and the ttm page pool, it seems to make the code complexity worse on both sides - so while I'm interested in continuing to try to find a way to share code here, I'm not sure it makes sense to hold up this series (which is already re-using an existing implementation and provide a performance bump in microbenchmarks) for the grand-unified-page-pool. Efforts to refactor the ttm pool and net page pool can continue on indepently, and I'd be happy to move the system heap to whatever that ends up being.
The thing is, I'm not sure sharing code with net/core is a really good idea, at least it seems like we have some impendence mismatch with the ttm pool. And going forward I expect sooner or later we need alignment between the pools/caches under drm with dma-buf heap pools a lot more than between dma-buf and net/core.
So this feels like a bit code sharing for code sharing's sake and not where it makes sense. Expecting net/core and gpu stacks to have the exact same needs for a page pool allocator has good chances to bite us in the long run. -Daniel
thanks -john _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Tue, Feb 2, 2021 at 6:04 AM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Jan 22, 2021 at 05:28:32PM -0800, John Stultz wrote:
On Mon, Dec 21, 2020 at 2:09 PM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Dec 18, 2020 at 05:16:56PM -0800, John Stultz wrote:
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote:
Reuse/abuse the pagepool code from the network code to speed up allocation performance.
This is similar to the ION pagepool usage, but tries to utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
Oops, I didn't look that closely and didn't realize you're reusing the one from net/core/.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
I think moving the page_pool from net into lib and using it in ttm might also be an option. Lack of shrinker in the networking one might be a bit a problem.
Yea. I've been looking at this, to see how to abstract out a generic pool implementation, but each pool implementation has been tweaked for the specific use cases, so a general abstraction is a bit tough right off.
For example the ttm pool's handling allocations both from alloc_pages and dma_alloc in a pool, where the net page pool only uses alloc_pages (but can pre map via dma_map_attr).
And as you mentioned, the networking page pool is statically sized where the ttm pool is dynamic and shrinker controlled.
Further, as the ttm pool is utilized for keeping pools of pages set for specific cache types, it makes it difficult to abstract that out as we have to be able to reset the caching (set_pages_wb()) when shrinking, so that would also have to be pushed down into the pool attributes as well.
So far, in my attempts to share an abstraction for both the net page_pool and the ttm page pool, it seems to make the code complexity worse on both sides - so while I'm interested in continuing to try to find a way to share code here, I'm not sure it makes sense to hold up this series (which is already re-using an existing implementation and provide a performance bump in microbenchmarks) for the grand-unified-page-pool. Efforts to refactor the ttm pool and net page pool can continue on indepently, and I'd be happy to move the system heap to whatever that ends up being.
The thing is, I'm not sure sharing code with net/core is a really good idea, at least it seems like we have some impendence mismatch with the ttm pool. And going forward I expect sooner or later we need alignment between the pools/caches under drm with dma-buf heap pools a lot more than between dma-buf and net/core.
I mean... I don't think you're wrong here, but it was your suggestion.
So this feels like a bit code sharing for code sharing's sake and not where it makes sense. Expecting net/core and gpu stacks to have the exact same needs for a page pool allocator has good chances to bite us in the long run.
Again, I agree with you at the high level here (dmabuf system heap and ttm page pooling are conceptually more likely to align, and duplication of buffer pools is non-optimal), but there's still the practical aspect of the ttm pool being pretty tied to the ttm code (utilizing ttm contexts, fixed MAX_ORDER*TTM_NUM_CACHING_TYPES subpools per pool + 4 global sub-pools for only x86).
So... I guess I'll go for another pass at trying to pull something generic out of the ttm_pool, but the cynic in me suspects folks will just object to any inefficiencies added in order to do so (the code-sharing for its own sake argument above) and I'll be back to where I am now. But we'll see.
thanks -john
On Tue, Feb 02, 2021 at 09:56:14PM -0800, John Stultz wrote:
On Tue, Feb 2, 2021 at 6:04 AM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Jan 22, 2021 at 05:28:32PM -0800, John Stultz wrote:
On Mon, Dec 21, 2020 at 2:09 PM Daniel Vetter daniel@ffwll.ch wrote:
On Fri, Dec 18, 2020 at 05:16:56PM -0800, John Stultz wrote:
On Fri, Dec 18, 2020 at 6:36 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Dec 17, 2020 at 11:06:11PM +0000, John Stultz wrote: > Reuse/abuse the pagepool code from the network code to speed > up allocation performance. > > This is similar to the ION pagepool usage, but tries to > utilize generic code instead of a custom implementation.
We also have one of these in ttm. I think we should have at most one of these for the gpu ecosystem overall, maybe as a helper that can be plugged into all the places.
Or I'm kinda missing something, which could be since I only glanced at yours for a bit. But it's also called page pool for buffer allocations, and I don't think there's that many ways to implement that really :-)
Yea, when I was looking around the ttm one didn't seem quite as generic as the networking one, which more easily fit in here.
Oops, I didn't look that closely and didn't realize you're reusing the one from net/core/.
The main benefit for the system heap is not so much the pool itself (the normal page allocator is pretty good), as it being able to defer the free and zero the pages in a background thread, so the pool is effectively filled with pre-zeroed pages.
But I'll take another look at the ttm implementation and see if it can be re-used or the shared code refactored and pulled out somehow.
I think moving the page_pool from net into lib and using it in ttm might also be an option. Lack of shrinker in the networking one might be a bit a problem.
Yea. I've been looking at this, to see how to abstract out a generic pool implementation, but each pool implementation has been tweaked for the specific use cases, so a general abstraction is a bit tough right off.
For example the ttm pool's handling allocations both from alloc_pages and dma_alloc in a pool, where the net page pool only uses alloc_pages (but can pre map via dma_map_attr).
And as you mentioned, the networking page pool is statically sized where the ttm pool is dynamic and shrinker controlled.
Further, as the ttm pool is utilized for keeping pools of pages set for specific cache types, it makes it difficult to abstract that out as we have to be able to reset the caching (set_pages_wb()) when shrinking, so that would also have to be pushed down into the pool attributes as well.
So far, in my attempts to share an abstraction for both the net page_pool and the ttm page pool, it seems to make the code complexity worse on both sides - so while I'm interested in continuing to try to find a way to share code here, I'm not sure it makes sense to hold up this series (which is already re-using an existing implementation and provide a performance bump in microbenchmarks) for the grand-unified-page-pool. Efforts to refactor the ttm pool and net page pool can continue on indepently, and I'd be happy to move the system heap to whatever that ends up being.
The thing is, I'm not sure sharing code with net/core is a really good idea, at least it seems like we have some impendence mismatch with the ttm pool. And going forward I expect sooner or later we need alignment between the pools/caches under drm with dma-buf heap pools a lot more than between dma-buf and net/core.
I mean... I don't think you're wrong here, but it was your suggestion.
So this feels like a bit code sharing for code sharing's sake and not where it makes sense. Expecting net/core and gpu stacks to have the exact same needs for a page pool allocator has good chances to bite us in the long run.
Again, I agree with you at the high level here (dmabuf system heap and ttm page pooling are conceptually more likely to align, and duplication of buffer pools is non-optimal), but there's still the practical aspect of the ttm pool being pretty tied to the ttm code (utilizing ttm contexts, fixed MAX_ORDER*TTM_NUM_CACHING_TYPES subpools per pool + 4 global sub-pools for only x86).
So... I guess I'll go for another pass at trying to pull something generic out of the ttm_pool, but the cynic in me suspects folks will just object to any inefficiencies added in order to do so (the code-sharing for its own sake argument above) and I'll be back to where I am now. But we'll see.
Yeah realistically we're not there yet most likely. It's also a bigger problem, with shrinkers all over various drivers and buffer locking scheme mostly of the yolo kind.
With Android I'm just more worried than with the other parts since in reality the actual production gpu stacks on android are all out of tree. And so there's substantial more inertia against refactoring (in practice at least, because the only people who care are not super willing to create tons of work in their out-of-tree stacks). And given past progress waiting for Android to arrive on upstream is also not a great option really - outside of some nice tech demos that in practice don't ship anywhere
And without the full stack a lot of this just looks like tech debt offloading without a hole lot of benefits to upstream.
But also, this isn't a new topic :-)
Cheers, Daniel
Utilize the deferred free helper library in the system heap.
This provides a nice performance bump and puts the system heap performance on par with ION.
Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Liam Mark lmark@codeaurora.org Cc: Chris Goldsworthy cgoldswo@codeaurora.org Cc: Laura Abbott labbott@kernel.org Cc: Brian Starkey Brian.Starkey@arm.com Cc: Hridya Valsaraju hridya@google.com Cc: Suren Baghdasaryan surenb@google.com Cc: Sandeep Patil sspatil@google.com Cc: Daniel Mentz danielmentz@google.com Cc: Ørjan Eide orjan.eide@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Ezequiel Garcia ezequiel@collabora.com Cc: Simon Ser contact@emersion.fr Cc: James Jones jajones@nvidia.com Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz john.stultz@linaro.org --- drivers/dma-buf/heaps/Kconfig | 1 + drivers/dma-buf/heaps/system_heap.c | 30 ++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index fa5e1c330cce..3c1cdecca9e2 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -5,6 +5,7 @@ config DMABUF_HEAPS_SYSTEM bool "DMA-BUF System Heap" depends on DMABUF_HEAPS select PAGE_POOL + select DMABUF_HEAPS_DEFERRED_FREE help Choose this option to enable the system dmabuf heap. The system heap is backed by pages from the buddy allocator. If in doubt, say Y. diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 885e30894b77..905b304ea24b 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -22,6 +22,8 @@ #include <linux/vmalloc.h> #include <net/page_pool.h>
+#include "deferred-free-helper.h" + static struct dma_heap *sys_heap;
struct system_heap_buffer { @@ -32,6 +34,7 @@ struct system_heap_buffer { struct sg_table sg_table; int vmap_cnt; void *vaddr; + struct deferred_freelist_item deferred_free; };
struct dma_heap_attachment { @@ -317,30 +320,43 @@ static int system_heap_zero_buffer(struct system_heap_buffer *buffer) return ret; }
-static void system_heap_dma_buf_release(struct dma_buf *dmabuf) +static void system_heap_buf_free(struct deferred_freelist_item *item, bool skip_pool) { - struct system_heap_buffer *buffer = dmabuf->priv; + struct system_heap_buffer *buffer = container_of(item, struct system_heap_buffer, deferred_free); struct sg_table *table; struct scatterlist *sg; int i, j;
/* Zero the buffer pages before adding back to the pool */ - system_heap_zero_buffer(buffer); + if (!skip_pool) + if (system_heap_zero_buffer(buffer)) + skip_pool = true; // On zeroing failure, just free
table = &buffer->sg_table; for_each_sg(table->sgl, sg, table->nents, i) { struct page *page = sg_page(sg);
- for (j = 0; j < NUM_ORDERS; j++) { - if (compound_order(page) == orders[j]) - break; + if (skip_pool) { + __free_pages(page, compound_order(page)); + } else { + for (j = 0; j < NUM_ORDERS; j++) { + if (compound_order(page) == orders[j]) + break; + } + page_pool_put_full_page(pools[j], page, false); } - page_pool_put_full_page(pools[j], page, false); } sg_free_table(table); kfree(buffer); }
+static void system_heap_dma_buf_release(struct dma_buf *dmabuf) +{ + struct system_heap_buffer *buffer = dmabuf->priv; + + deferred_free(&buffer->deferred_free, system_heap_buf_free, buffer->len); +} + static const struct dma_buf_ops system_heap_buf_ops = { .attach = system_heap_attach, .detach = system_heap_detach,
Greeting,
FYI, we noticed the following commit (built with gcc-9):
commit: adc430f226136c02a382fe6dff29a6382059ce0c ("dma-buf: system_heap: Add deferred freeing to the system heap") url: https://github.com/0day-ci/linux/commits/John-Stultz/dma-buf-heaps-Add-defer...
in testcase: locktorture version: with following parameters:
runtime: 300s test: cpuhotplug
test-description: This torture test consists of creating a number of kernel threads which acquire the lock and hold it for specific amount of time, thus simulating different critical region behaviors. test-url: https://www.kernel.org/doc/Documentation/locking/locktorture.txt
on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
+-------------------------------------------------------+------------+------------+ | | 51e229fb2d | adc430f226 | +-------------------------------------------------------+------------+------------+ | boot_successes | 19 | 0 | | boot_failures | 9 | 4 | | invoked_oom-killer:gfp_mask=0x | 1 | | | Mem-Info | 1 | | | EIP:__put_user_nocheck_4 | 1 | | | kernel_BUG_at_kernel/sched/core.c | 8 | 4 | | invalid_opcode:#[##] | 8 | 4 | | EIP:sched_cpu_dying | 8 | 4 | | Kernel_panic-not_syncing:Fatal_exception | 8 | 4 | | WARNING:possible_circular_locking_dependency_detected | 8 | 4 | | WARNING:at_kernel/sched/core.c:#__might_sleep | 0 | 4 | | EIP:__might_sleep | 0 | 4 | +-------------------------------------------------------+------------+------------+
If you fix the issue, kindly add following tag Reported-by: kernel test robot oliver.sang@intel.com
[ 107.814818] WARNING: CPU: 0 PID: 130 at kernel/sched/core.c:7858 __might_sleep+0x6c/0x80 [ 107.816258] mtdoops: mtd device (mtddev=name/number) must be supplied [ 107.818504] Modules linked in: [ 107.818520] CPU: 0 PID: 130 Comm: dmabuf-deferred Not tainted 5.10.0-next-20201217-00003-gadc430f22613 #1 [ 107.818536] EIP: __might_sleep+0x6c/0x80 [ 107.821708] L440GX flash mapping: failed to find PIIX4 ISA bridge, cannot continue [ 107.823070] Code: 32 db 01 8b 73 0c 8b 83 e0 17 00 00 89 44 24 0c 8b 9b e0 17 00 00 89 5c 24 08 89 74 24 04 c7 04 24 38 92 cb da e8 c9 a5 b5 00 <0f> 0b 8b 4d ec 8b 55 f0 8b 45 f4 eb b0 8d b4 26 00 00 00 00 55 89 [ 107.827562] SBC-GXx flash: IO:0x258-0x259 MEM:0xdc000-0xdffff [ 107.828850] EAX: 00000069 EBX: d96a0ed1 ECX: 00000000 EDX: 00000000 [ 107.836825] scx200_docflash: NatSemi SCx200 DOCCS Flash Driver [ 107.839849] ESI: 00000001 EDI: c11e4200 EBP: c11b3eec ESP: c11b3ec8 [ 107.839857] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010296 [ 107.839867] CR0: 80050033 CR2: 00000000 CR3: 1b540000 CR4: 00040690 [ 107.839889] Call Trace: [ 107.842656] slram: not enough parameters. [ 107.844946] ? prepare_to_wait_event+0x51/0x140 [ 107.855521] mtd mtd0: GPIO lookup for consumer wp [ 107.855658] ? prepare_to_wait_event+0x51/0x140 [ 107.856828] mtd mtd0: using lookup tables for GPIO lookup [ 107.856842] mtd mtd0: No GPIO consumer wp found [ 107.858676] __mutex_lock+0x21/0x820 [ 107.858693] ? lockdep_hardirqs_on_prepare+0xda/0x180 [ 107.858706] ? _raw_spin_unlock_irqrestore+0x35/0x40 [ 107.874413] ? prepare_to_wait_event+0x7d/0x140 [ 107.876397] ? trace_hardirqs_on+0x22/0xe0 [ 107.878110] ? prepare_to_wait_event+0x7d/0x140 [ 107.880095] mutex_lock_nested+0x20/0x40 [ 107.881791] ? deferred_free_thread+0x75/0xc0 [ 107.883801] deferred_free_thread+0x75/0xc0 [ 107.885686] ? do_wait_intr_irq+0xa0/0xa0 [ 107.887498] kthread+0x107/0x120 [ 107.889019] ? freelist_shrink_count+0x40/0x40 [ 107.890980] ? kthread_create_worker_on_cpu+0x20/0x20 [ 107.893128] ret_from_fork+0x19/0x24 [ 107.894780] irq event stamp: 339 [ 107.896256] hardirqs last enabled at (349): [<d96bfb75>] console_unlock+0x3f5/0x5a0 [ 107.899642] hardirqs last disabled at (358): [<d96bfb66>] console_unlock+0x3e6/0x5a0 [ 107.903036] softirqs last enabled at (336): [<da2d79d5>] __do_softirq+0x295/0x435 [ 107.906066] softirqs last disabled at (331): [<d96232bd>] do_softirq_own_stack+0x1d/0x40 [ 107.908613] random: get_random_bytes called from init_oops_id+0x37/0x40 with crng_init=0 [ 107.908624] ---[ end trace 4d9142d8a53ac308 ]---
To reproduce:
# build kernel cd linux cp config-5.10.0-next-20201217-00003-gadc430f22613 .config make HOSTCC=gcc-9 CC=gcc-9 ARCH=i386 olddefconfig prepare modules_prepare bzImage modules make HOSTCC=gcc-9 CC=gcc-9 ARCH=i386 INSTALL_MOD_PATH=<mod-install-dir> modules_install cd <mod-install-dir> find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
git clone https://github.com/intel/lkp-tests.git cd lkp-tests bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
Thanks, Oliver Sang
Hi John, Just a couple nits, otherwise looks sane to me.
On Thu, Dec 17, 2020 at 3:06 PM John Stultz john.stultz@linaro.org wrote:
This patch provides infrastructure for deferring buffer frees.
This is a feature ION provided which when used with some form of a page pool, provides a nice performance boost in an allocation microbenchmark. The reason it helps is it allows the page-zeroing to be done out of the normal allocation/free path, and pushed off to a kthread.
I suggest adding some more description for this API and how it can be used. IIUC there are 2 uses: lazy deferred freeing using kthread, and object pooling. no_pool parameter I think deserves some explanation (disallows pooling when system is under memory pressure).
As not all heaps will find this useful, its implemented as a optional helper library that heaps can utilize.
Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Liam Mark lmark@codeaurora.org Cc: Chris Goldsworthy cgoldswo@codeaurora.org Cc: Laura Abbott labbott@kernel.org Cc: Brian Starkey Brian.Starkey@arm.com Cc: Hridya Valsaraju hridya@google.com Cc: Suren Baghdasaryan surenb@google.com Cc: Sandeep Patil sspatil@google.com Cc: Daniel Mentz danielmentz@google.com Cc: Ørjan Eide orjan.eide@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Ezequiel Garcia ezequiel@collabora.com Cc: Simon Ser contact@emersion.fr Cc: James Jones jajones@nvidia.com Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz john.stultz@linaro.org
drivers/dma-buf/heaps/Kconfig | 3 + drivers/dma-buf/heaps/Makefile | 1 + drivers/dma-buf/heaps/deferred-free-helper.c | 136 +++++++++++++++++++ drivers/dma-buf/heaps/deferred-free-helper.h | 15 ++ 4 files changed, 155 insertions(+) create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.c create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.h
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index a5eef06c4226..ecf65204f714 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -1,3 +1,6 @@ +config DMABUF_HEAPS_DEFERRED_FREE
bool
config DMABUF_HEAPS_SYSTEM bool "DMA-BUF System Heap" depends on DMABUF_HEAPS diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile index 974467791032..4e7839875615 100644 --- a/drivers/dma-buf/heaps/Makefile +++ b/drivers/dma-buf/heaps/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c new file mode 100644 index 000000000000..b8f54860454f --- /dev/null +++ b/drivers/dma-buf/heaps/deferred-free-helper.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- Deferred dmabuf freeing helper
- Copyright (C) 2020 Linaro, Ltd.
- Based on the ION page pool code
- Copyright (C) 2011 Google, Inc.
- */
+#include <linux/freezer.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/sched/signal.h>
+#include "deferred-free-helper.h"
+static LIST_HEAD(free_list); +static size_t list_size; +wait_queue_head_t freelist_waitqueue; +struct task_struct *freelist_task; +static DEFINE_MUTEX(free_list_lock);
+enum {
USE_POOL = 0,
SKIP_POOL = 1,
+};
This enum is used for a bool parameter. Either make it part of the public API or eliminate and use bool instead.
+void deferred_free(struct deferred_freelist_item *item,
void (*free)(struct deferred_freelist_item*, bool),
size_t size)
+{
INIT_LIST_HEAD(&item->list);
item->size = size;
item->free = free;
mutex_lock(&free_list_lock);
list_add(&item->list, &free_list);
list_size += size;
mutex_unlock(&free_list_lock);
wake_up(&freelist_waitqueue);
+}
+static size_t free_one_item(bool nopool) +{
size_t size = 0;
struct deferred_freelist_item *item;
mutex_lock(&free_list_lock);
if (list_empty(&free_list)) {
mutex_unlock(&free_list_lock);
return 0;
}
item = list_first_entry(&free_list, struct deferred_freelist_item, list);
list_del(&item->list);
size = item->size;
list_size -= size;
mutex_unlock(&free_list_lock);
item->free(item, nopool);
return size;
+}
+static unsigned long get_freelist_size(void) +{
unsigned long size;
mutex_lock(&free_list_lock);
size = list_size;
mutex_unlock(&free_list_lock);
return size;
+}
+static unsigned long freelist_shrink_count(struct shrinker *shrinker,
struct shrink_control *sc)
+{
return get_freelist_size();
+}
+static unsigned long freelist_shrink_scan(struct shrinker *shrinker,
struct shrink_control *sc)
+{
int total_freed = 0;
if (sc->nr_to_scan == 0)
return 0;
while (total_freed < sc->nr_to_scan) {
int freed = free_one_item(SKIP_POOL);
if (!freed)
break;
total_freed += freed;
}
return total_freed;
+}
+static struct shrinker freelist_shrinker = {
.count_objects = freelist_shrink_count,
.scan_objects = freelist_shrink_scan,
.seeks = DEFAULT_SEEKS,
.batch = 0,
+};
+static int deferred_free_thread(void *data) +{
while (true) {
wait_event_freezable(freelist_waitqueue,
get_freelist_size() > 0);
free_one_item(USE_POOL);
}
return 0;
+}
+static int deferred_freelist_init(void) +{
list_size = 0;
init_waitqueue_head(&freelist_waitqueue);
freelist_task = kthread_run(deferred_free_thread, NULL,
"%s", "dmabuf-deferred-free-worker");
if (IS_ERR(freelist_task)) {
pr_err("%s: creating thread for deferred free failed\n",
__func__);
return -1;
}
sched_set_normal(freelist_task, 19);
return register_shrinker(&freelist_shrinker);
+} +device_initcall(deferred_freelist_init); diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h new file mode 100644 index 000000000000..09a2274a897c --- /dev/null +++ b/drivers/dma-buf/heaps/deferred-free-helper.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DEFERRED_FREE_HELPER_H +#define DEFERRED_FREE_HELPER_H
+struct deferred_freelist_item {
size_t size;
void (*free)(struct deferred_freelist_item *i, bool no_pool);
struct list_head list;
+};
I assume deferred_freelist_item gets embedded in a structure that is actually being freed?
+void deferred_free(struct deferred_freelist_item *item,
void (*free)(struct deferred_freelist_item *i, bool no_pool),
size_t size);
+#endif
2.17.1
dri-devel@lists.freedesktop.org