Re: [PATCH 1/4] drm/ttm: add a pointer to the allocating BO into ttm_resource

11 Jun 2021

Hi, Christian,
I know you have a lot on your plate, and that the drm community is a bit 
lax about following the kernel patch submitting guidelines, but now that 
we're also spinning up a number of Intel developers on TTM could we 
please make a better effort with cover letters and commit messages so 
that they understand what the purpose and end goal of the series is. A 
reviewer shouldn't have to look at the last patch to try to get an 
understanding what the series is doing and why.
On 6/10/21 1:05 PM, Christian König wrote:
...
We are going to need this for the next patch
...
and it allows us to clean
up amdgpu as well.
The amdgpu changes are not reflected in the commit title.
...
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 47 ++++++++-------------
  drivers/gpu/drm/ttm/ttm_resource.c          |  1 +
  include/drm/ttm/ttm_resource.h              |  1 +
  3 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 194f9eecf89c..8e3f5da44e4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,23 +26,12 @@
#include "amdgpu.h"
-struct amdgpu_gtt_node {

struct ttm_buffer_object *tbo;
struct ttm_range_mgr_node base;

-};

static inline struct amdgpu_gtt_mgr *
to_gtt_mgr(struct ttm_resource_manager *man)
{
 return container_of(man, struct amdgpu_gtt_mgr, manager);
}

-static inline struct amdgpu_gtt_node *
-to_amdgpu_gtt_node(struct ttm_resource *res)
-{

return container_of(res, struct amdgpu_gtt_node, base.base);

-}

/**
DOC: mem_info_gtt_total




@@ -107,9 +96,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
   */
  bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
  {

struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);


struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);


return drm_mm_node_allocated(&node->base.mm_nodes[0]);


return drm_mm_node_allocated(&node->mm_nodes[0]);
}
/**


@@ -129,7 +118,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  {
   struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
   uint32_t num_pages = PFN_UP(tbo->base.size);

struct amdgpu_gtt_node *node;


struct ttm_range_mgr_node *node;
int r;
spin_lock(&mgr->lock);


@@ -141,19 +130,17 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
   atomic64_sub(num_pages, &mgr->available);
   spin_unlock(&mgr->lock);

node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);


node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
if (!node) {
r = -ENOMEM;
goto err_out;
}


node->tbo = tbo;
ttm_resource_init(tbo, place, &node->base.base);



ttm_resource_init(tbo, place, &node->base);
if (place->lpfn) {
spin_lock(&mgr->lock);
r = drm_mm_insert_node_in_range(&mgr->mm,


				&node->base.mm_nodes[0],




				&node->mm_nodes[0],
			num_pages, tbo->page_alignment,
			0, place->fpfn, place->lpfn,
			DRM_MM_INSERT_BEST);



@@ -161,14 +148,14 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
   	if (unlikely(r))
   		goto err_free;

node->base.base.start = node->base.mm_nodes[0].start;




node->base.start = node->mm_nodes[0].start;

} else {


node->base.mm_nodes[0].start = 0;


node->base.mm_nodes[0].size = node->base.base.num_pages;


node->base.base.start = AMDGPU_BO_INVALID_OFFSET;




node->mm_nodes[0].start = 0;


node->mm_nodes[0].size = node->base.num_pages;


node->base.start = AMDGPU_BO_INVALID_OFFSET;

}


*res = &node->base.base;


*res = &node->base;
return 0;
err_free:


@@ -191,12 +178,12 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
   		       struct ttm_resource *res)
  {

struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);


struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
spin_lock(&mgr->lock);



if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
drm_mm_remove_node(&node->base.mm_nodes[0]);




if (drm_mm_node_allocated(&node->mm_nodes[0]))
drm_mm_remove_node(&node->mm_nodes[0]);

spin_unlock(&mgr->lock);
atomic64_add(res->num_pages, &mgr->available);

@@ -228,14 +215,14 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
   struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);

struct amdgpu_gtt_node *node;


struct ttm_range_mgr_node *node;
struct drm_mm_node *mm_node;
int r = 0;
spin_lock(&mgr->lock);
drm_mm_for_each_node(mm_node, &mgr->mm) {



node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);


r = amdgpu_ttm_recover_gart(node->tbo);




node = container_of(mm_node, typeof(*node), mm_nodes[0]);


r = amdgpu_ttm_recover_gart(node->base.bo);

if (r)
	break;
}

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 2431717376e7..7ff6194154fe 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -41,6 +41,7 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
   res->bus.offset = 0;
   res->bus.is_iomem = false;
   res->bus.caching = ttm_cached;

res->bo = bo;
}
EXPORT_SYMBOL(ttm_resource_init);

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 140b6b9a8bbe..6d0b7a6d2169 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -171,6 +171,7 @@ struct ttm_resource {
   uint32_t mem_type;
   uint32_t placement;
   struct ttm_bus_placement bus;

struct ttm_buffer_object *bo;

Not that I'm against this change by itself, but this bo pointer is not 
refcounted, and therefore needs a description when it's needed and why. 
What happens, for example when the resource is moved to a ghost object, 
or the bo is killed while the resource is remaining on a lru list (which 
I understand was one of the main purposes with free-standing resources). 
Weak references need a guarantee that the object they pointed to is 
alive. What is that guarantee?
Also could we introduce new TTM structure members where they are first 
used /referenced by TTM and not where they are used by amdgpu? Without 
finding out in patch 3 that this member is needed to look up the bo from 
a lru list the correct response to this patch would have been: That bo 
is amdgpu-specific and needs to be in a driver private struct...
Thanks,
/Thomas
...
};
/**

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH 1/4] drm/ttm: add a pointer to the allocating BO into ttm_resource

Signed-off-by: Christian König christian.koenig@amd.com