Userspace can severely fragment rb_hole_addr rbtree by manipulating alignment while allocating buffers. Fragmented rb_hole_addr rbtree would result in large delays while allocating buffer object for a userspace application. It takes long time to find suitable hole because if we fail to find a suitable hole in the first attempt then we look for neighbouring nodes using rb_prev(). Traversing rbtree using rb_prev() can take really long time if the tree is fragmented.
This patch improves searches in fragmented rb_hole_addr rbtree by storing an extra field in drm_mm_node, max_hole_size. Each drm_mm_node now stores maximum hole size for its subtree in drm_mm_node->max_hole_size. Using drm_mm_node->max_hole_size, it is possible to eliminate complete subtree if that subtree is unable to serve a request hence reducing number of rb_prev() used.
Note: Implementation details of rb_hole_addr rbtree updates after a node removal and addition is borrowed from block/bfq-wf2q.c which is trying to achieve a similar goal.
With this patch applied, 1 million bo allocations on amdgpu took ~8 sec and without the patch the test code took 28 sec for only 50k bo allocations.
partial test code: int test_fragmentation(void) { #define MAX_ALLOC 1000000
int i = 0; uint32_t minor_version; uint32_t major_version;
struct amdgpu_bo_alloc_request request = {}; amdgpu_bo_handle vram_handle[MAX_ALLOC] = {}; amdgpu_device_handle device_handle;
request.alloc_size = 4096; request.phys_alignment = 8192; request.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle);
for (i = 0; i < MAX_ALLOC; i++) { amdgpu_bo_alloc(device_handle, &request, &vram_handle[i]); }
for (i = 0; i < MAX_ALLOC; i++) amdgpu_bo_free(vram_handle[i]);
return 0; }
Signed-off-by: Nirmoy Das nirmoy.das@amd.com --- drivers/gpu/drm/drm_mm.c | 140 +++++++++++++++++++++++++++++++++++++-- include/drm/drm_mm.h | 1 + 2 files changed, 137 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8981abe8b7c9..5076d3761f51 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -212,6 +212,84 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, &drm_mm_interval_tree_augment); }
+static void update_addr_hole_node_max(struct drm_mm_node *entity, + struct rb_node *node) +{ + struct drm_mm_node *child; + + if (node) { + child = rb_entry(node, struct drm_mm_node, rb_hole_addr); + if (child->max_hole_size > entity->max_hole_size) + entity->max_hole_size = child->max_hole_size; + } +} + +static void update_addr_hole_node(struct rb_node *node) +{ + struct drm_mm_node *entity = rb_entry(node, struct drm_mm_node, + rb_hole_addr); + + entity->max_hole_size = entity->hole_size; + update_addr_hole_node_max(entity, node->rb_right); + update_addr_hole_node_max(entity, node->rb_left); +} + +/** + * update_addr_hole_rbtree - update max_hole_size field for the whole tree. + * @node: the starting node. + * + * This function updates max_hole_size field of the @node and all the nodes + * that are in the path to the root. + */ +static void update_addr_hole_rbtree(struct rb_node *node) +{ + struct rb_node *parent; + + while (node) { + update_addr_hole_node(node); + + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + update_addr_hole_node(parent->rb_right); + else if (parent->rb_left) + update_addr_hole_node(parent->rb_left); + node = parent; + } +} + +/** + * find_deepest - find the deepest node that an extraction can modify. + * @node: the node being removed. + * + * This finction returns the deepest node that can be moved by rbtree's + * rebalance operation after removal of @node. If @node is the last node + * in the tree then return NULL. + */ +static struct rb_node *find_deepest(struct rb_node *node) +{ + struct rb_node *deepest = NULL; + + if (!node->rb_left && !node->rb_right) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + + } + + return deepest; +} + #define RB_INSERT(root, member, expr) do { \ struct rb_node **link = &root.rb_node, *rb = NULL; \ u64 x = expr(node); \ @@ -258,26 +336,39 @@ static void insert_hole_size(struct rb_root_cached *root, static void add_hole(struct drm_mm_node *node) { struct drm_mm *mm = node->mm; + struct rb_node *rb = &node->rb_hole_addr;
node->hole_size = __drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node); + node->max_hole_size = node->hole_size; DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
insert_hole_size(&mm->holes_size, node); RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR); - list_add(&node->hole_stack, &mm->hole_stack); + + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + + update_addr_hole_rbtree(rb); }
static void rm_hole(struct drm_mm_node *node) { + struct rb_node *rb_deepest; DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
list_del(&node->hole_stack); rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size); + + rb_deepest = find_deepest(&node->rb_hole_addr); rb_erase(&node->rb_hole_addr, &node->mm->holes_addr); - node->hole_size = 0; + update_addr_hole_rbtree(rb_deepest);
+ node->hole_size = 0; + node->max_hole_size = 0; DRM_MM_BUG_ON(drm_mm_hole_follows(node)); }
@@ -338,6 +429,46 @@ static struct drm_mm_node *find_hole(struct drm_mm *mm, u64 addr) return node; }
+/** + * next_hole_high_addr - returns next hole for a DRM_MM_INSERT_HIGH mode request + * @entry: previously selected drm_mm_node + * @size: size of the a hole needed for the request + * + * This function will verify whether left subtree of @entry has hole big enough + * to fit the requtested size. If so, it will return previous node of @entry or + * else it will return parent node of @entry + * + * It will also skip the complete left subtree if max_hole_size of that subtree + * is same as the max_hole_size of the @entry. + * + * Returns: + * previous node of @entry if left subtree of @entry can serve the request or + * else return parent of @entry + */ +static struct drm_mm_node * +next_hole_high_addr(struct drm_mm_node *entry, u64 size) +{ + struct rb_node *rb_node, *left_rb_node, *parent_rb_node; + struct drm_mm_node *left_node; + + if (!entry) + return false; + + rb_node = &entry->rb_hole_addr; + if (rb_node->rb_left) { + left_rb_node = rb_node->rb_left; + parent_rb_node = rb_parent(rb_node); + left_node = rb_entry(left_rb_node, + struct drm_mm_node, rb_hole_addr); + if ((left_node->max_hole_size < size || + entry->size == entry->max_hole_size) && + parent_rb_node && parent_rb_node->rb_left != rb_node) + return rb_hole_addr_to_node(parent_rb_node); + } + + return rb_hole_addr_to_node(rb_prev(rb_node)); +} + static struct drm_mm_node * first_hole(struct drm_mm *mm, u64 start, u64 end, u64 size, @@ -364,6 +495,7 @@ first_hole(struct drm_mm *mm, static struct drm_mm_node * next_hole(struct drm_mm *mm, struct drm_mm_node *node, + u64 size, enum drm_mm_insert_mode mode) { switch (mode) { @@ -375,7 +507,7 @@ next_hole(struct drm_mm *mm, return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));
case DRM_MM_INSERT_HIGH: - return rb_hole_addr_to_node(rb_prev(&node->rb_hole_addr)); + return next_hole_high_addr(node, size);
case DRM_MM_INSERT_EVICT: node = list_next_entry(node, hole_stack); @@ -489,7 +621,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; for (hole = first_hole(mm, range_start, range_end, size, mode); hole; - hole = once ? NULL : next_hole(mm, hole, mode)) { + hole = once ? NULL : next_hole(mm, hole, size, mode)) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ee8b0e80ca90..0ee555ff2d9e 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -168,6 +168,7 @@ struct drm_mm_node { struct rb_node rb_hole_addr; u64 __subtree_last; u64 hole_size; + u64 max_hole_size; unsigned long flags; #define DRM_MM_NODE_ALLOCATED_BIT 0 #define DRM_MM_NODE_SCANNED_BIT 1 -- 2.26.1
Quoting Nirmoy Das (2020-04-28 17:04:23)
Userspace can severely fragment rb_hole_addr rbtree by manipulating alignment while allocating buffers. Fragmented rb_hole_addr rbtree would result in large delays while allocating buffer object for a userspace application. It takes long time to find suitable hole because if we fail to find a suitable hole in the first attempt then we look for neighbouring nodes using rb_prev(). Traversing rbtree using rb_prev() can take really long time if the tree is fragmented.
This patch improves searches in fragmented rb_hole_addr rbtree by storing an extra field in drm_mm_node, max_hole_size. Each drm_mm_node now stores maximum hole size for its subtree in drm_mm_node->max_hole_size. Using drm_mm_node->max_hole_size, it is possible to eliminate complete subtree if that subtree is unable to serve a request hence reducing number of rb_prev() used.
Note: Implementation details of rb_hole_addr rbtree updates after a node removal and addition is borrowed from block/bfq-wf2q.c which is trying to achieve a similar goal.
Feels like it is an augmented rbtree?
+static struct drm_mm_node * +next_hole_high_addr(struct drm_mm_node *entry, u64 size) +{
struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
struct drm_mm_node *left_node;
if (!entry)
return false;
rb_node = &entry->rb_hole_addr;
if (rb_node->rb_left) {
left_rb_node = rb_node->rb_left;
parent_rb_node = rb_parent(rb_node);
left_node = rb_entry(left_rb_node,
struct drm_mm_node, rb_hole_addr);
if ((left_node->max_hole_size < size ||
entry->size == entry->max_hole_size) &&
parent_rb_node && parent_rb_node->rb_left != rb_node)
return rb_hole_addr_to_node(parent_rb_node);
}
return rb_hole_addr_to_node(rb_prev(rb_node));
+}
The max_hole_size would equally apply to next_hole_low_addr(), right?
Sadly, I did not explore the fragmented search LOW/HIGH in test-drm_mm.c. That would be a useful addition as well. -Chris
On 4/28/20 6:18 PM, Chris Wilson wrote:
Quoting Nirmoy Das (2020-04-28 17:04:23)
Userspace can severely fragment rb_hole_addr rbtree by manipulating alignment while allocating buffers. Fragmented rb_hole_addr rbtree would result in large delays while allocating buffer object for a userspace application. It takes long time to find suitable hole because if we fail to find a suitable hole in the first attempt then we look for neighbouring nodes using rb_prev(). Traversing rbtree using rb_prev() can take really long time if the tree is fragmented.
This patch improves searches in fragmented rb_hole_addr rbtree by storing an extra field in drm_mm_node, max_hole_size. Each drm_mm_node now stores maximum hole size for its subtree in drm_mm_node->max_hole_size. Using drm_mm_node->max_hole_size, it is possible to eliminate complete subtree if that subtree is unable to serve a request hence reducing number of rb_prev() used.
Note: Implementation details of rb_hole_addr rbtree updates after a node removal and addition is borrowed from block/bfq-wf2q.c which is trying to achieve a similar goal.
Feels like it is an augmented rbtree?
I just read about augmented rbtree quickly and yes it feels like so. I will try to understand more
about it and will see if I can convert this logic to a augmented rbtree.
+static struct drm_mm_node * +next_hole_high_addr(struct drm_mm_node *entry, u64 size) +{
struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
struct drm_mm_node *left_node;
if (!entry)
return false;
rb_node = &entry->rb_hole_addr;
if (rb_node->rb_left) {
left_rb_node = rb_node->rb_left;
parent_rb_node = rb_parent(rb_node);
left_node = rb_entry(left_rb_node,
struct drm_mm_node, rb_hole_addr);
if ((left_node->max_hole_size < size ||
entry->size == entry->max_hole_size) &&
parent_rb_node && parent_rb_node->rb_left != rb_node)
return rb_hole_addr_to_node(parent_rb_node);
}
return rb_hole_addr_to_node(rb_prev(rb_node));
+}
The max_hole_size would equally apply to next_hole_low_addr(), right?
Sorry, yes we need that too. I will add next_hole_low_addr().
Sadly, I did not explore the fragmented search LOW/HIGH in test-drm_mm.c. That would be a useful addition as well.
Yes that would be useful too.
I used ./tools/testing/selftests/drivers/gpu/drm_mm.sh to verify this patch.
-Chris
Thanks for reviewing,
Nirmoy
Hi Nirmoy,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on drm-intel/for-linux-next] [also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next drm-tip/drm-tip linus/master v5.7-rc3 next-20200429] [cannot apply to drm/drm-next] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Nirmoy-Das/drm-mm-optimize-rb_hole_... base: git://anongit.freedesktop.org/drm-intel for-linux-next reproduce: # apt-get install sparse # sparse version: v0.6.1-191-gc51a0382-dirty make ARCH=x86_64 allmodconfig make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
If you fix the issue, kindly add following tag as appropriate Reported-by: kbuild test robot lkp@intel.com
sparse warnings: (new ones prefixed by >>)
drivers/gpu/drm/drm_mm.c:455:24: sparse: sparse: Using plain integer as NULL pointer
vim +455 drivers/gpu/drm/drm_mm.c
431 432 /** 433 * next_hole_high_addr - returns next hole for a DRM_MM_INSERT_HIGH mode request 434 * @entry: previously selected drm_mm_node 435 * @size: size of the a hole needed for the request 436 * 437 * This function will verify whether left subtree of @entry has hole big enough 438 * to fit the requtested size. If so, it will return previous node of @entry or 439 * else it will return parent node of @entry 440 * 441 * It will also skip the complete left subtree if max_hole_size of that subtree 442 * is same as the max_hole_size of the @entry. 443 * 444 * Returns: 445 * previous node of @entry if left subtree of @entry can serve the request or 446 * else return parent of @entry 447 */ 448 static struct drm_mm_node * 449 next_hole_high_addr(struct drm_mm_node *entry, u64 size) 450 { 451 struct rb_node *rb_node, *left_rb_node, *parent_rb_node; 452 struct drm_mm_node *left_node; 453 454 if (!entry)
455 return false;
456 457 rb_node = &entry->rb_hole_addr; 458 if (rb_node->rb_left) { 459 left_rb_node = rb_node->rb_left; 460 parent_rb_node = rb_parent(rb_node); 461 left_node = rb_entry(left_rb_node, 462 struct drm_mm_node, rb_hole_addr); 463 if ((left_node->max_hole_size < size || 464 entry->size == entry->max_hole_size) && 465 parent_rb_node && parent_rb_node->rb_left != rb_node) 466 return rb_hole_addr_to_node(parent_rb_node); 467 } 468 469 return rb_hole_addr_to_node(rb_prev(rb_node)); 470 } 471
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
dri-devel@lists.freedesktop.org