Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2

11 Jun 2021

Am 11.06.21 um 16:56 schrieb Daniel Vetter:
...
On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
...
Drop the workaround and instead implement a better solution.
Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.
This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.
v2: polish kerneldoc a bit
There is no kerneldoc here, and otherwise this patch looks unchanged from
the previous round. Hit send a bit too quickly?
Ah, there it went!
It's just to hot here, that was meant to be added to the other patch and 
I was really wondering why it wasn't there!
Going to call it a day or rather week and will look at that again on Monday.
Christian.
...
-Daniel
...
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
  6 files changed, 47 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
  struct amdgpu_bo_list_entry {
   struct ttm_validate_buffer	tv;
   struct amdgpu_bo_va		*bo_va;

struct dma_fence_chain		*chain;
uint32_t			priority;
struct page			**user_pages;
bool				user_invalidated;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 325e82621467..f6f3029f958d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
   	goto out;
   }

amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);



e->bo_va = amdgpu_vm_bo_find(vm, bo);



if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {


	e->chain = dma_fence_chain_alloc();


	if (!e->chain) {


		r = -ENOMEM;


		goto error_validate;


	}


}


}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
  			  &p->bytes_moved_vis_threshold);
 p->bytes_moved = 0;

@@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
   gws = p->bo_list->gws_obj;
   oa = p->bo_list->oa_obj;

amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);



/* Make sure we use the exclusive slot for shared BOs */


if (bo->prime_shared_count)


	e->tv.num_shared = 0;


e->bo_va = amdgpu_vm_bo_find(vm, bo);


}
if (gds) {
  p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
  p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;

@@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
   }
error_validate:

if (r)


if (r) {
amdgpu_bo_list_for_each_entry(e, p->bo_list) {


	dma_fence_chain_free(e->chain);


	e->chain = NULL;


}

ttm_eu_backoff_reservation(&p->ticket, &p->validated);
}
out:
return r;
}

@@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
  {
   unsigned i;

if (error && backoff)


if (error && backoff) {

struct amdgpu_bo_list_entry *e;



amdgpu_bo_list_for_each_entry(e, parser->bo_list) {


	dma_fence_chain_free(e->chain);


	e->chain = NULL;


}


ttm_eu_backoff_reservation(&parser->ticket,
  			   &parser->validated);

}
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);


@@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);

amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct dma_resv *resv = e->tv.bo->base.resv;


struct dma_fence_chain *chain = e->chain;



if (!chain)


	continue;



dma_fence_chain_init(chain, dma_resv_excl_fence(resv),


		     dma_fence_get(p->fence), 1);



rcu_assign_pointer(resv->fence_excl, &chain->base);


e->chain = NULL;


}
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 mutex_unlock(&p->adev->notifier_lock);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
  #include <linux/pci-p2pdma.h>
  #include <linux/pm_runtime.h>
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{

struct dma_fence **fences;
unsigned int count;
int r;

if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
return 0;



r = dma_resv_get_fences(obj, NULL, &count, &fences);
if (r)
return r;



if (count == 0) {
/* Now that was unexpected. */


} else if (count == 1) {
dma_resv_add_excl_fence(obj, fences[0]);


dma_fence_put(fences[0]);


kfree(fences);


} else {
struct dma_fence_array *array;



array = dma_fence_array_create(count, fences,


			       dma_fence_context_alloc(1), 0,


			       false);


if (!array)


	goto err_fences_put;



dma_resv_add_excl_fence(obj, &array->base);


dma_fence_put(&array->base);


}

return 0;


-err_fences_put:

while (count--)
dma_fence_put(fences[count]);


kfree(fences);
return -ENOMEM;

-}

/**
amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation




@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
   if (r < 0)
   	goto out;

r = amdgpu_bo_reserve(bo, false);

if (unlikely(r != 0))

goto out;



/*

* We only create shared fences for internal use, but importers


* of the dmabuf rely on exclusive fences for implicitly


* tracking write hazards. As any of the current fences may


* correspond to a write, we need to convert all existing


* fences on the reservation object into a single exclusive


* fence.


*/


r = __dma_resv_make_exclusive(bo->tbo.base.resv);

if (r)

goto out;



bo->prime_shared_count++;

amdgpu_bo_unreserve(bo);
return 0;
out:


@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
bo->prime_shared_count--;


pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}

@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
   bo = gem_to_amdgpu_bo(gobj);
   bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
   bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;

if (dma_buf->ops != &amdgpu_dmabuf_ops)

bo->prime_shared_count = 1;


dma_resv_unlock(resv);
return gobj;


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1c3e3b608332..5d82120fc160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
   	break;
   }
   case AMDGPU_GEM_OP_SET_PLACEMENT:

if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {




if (robj->tbo.base.import_attach &&


    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
amdgpu_bo_unreserve(robj);
break;



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 96447e1d4c9c..30951b593809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
   	return -EINVAL;
/* A shared bo cannot be migrated to VRAM */

if (bo->prime_shared_count || bo->tbo.base.import_attach) {


if (bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
	domain = AMDGPU_GEM_DOMAIN_GTT;
else

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index b35962702278..55d7e90eaa72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -98,7 +98,6 @@ struct amdgpu_bo {
   struct ttm_buffer_object	tbo;
   struct ttm_bo_kmap_obj		kmap;
   u64				flags;

unsigned			prime_shared_count;
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base	*vm_bo;
/* Constant after initialization */

-- 
2.25.1

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2

Signed-off-by: Christian König christian.koenig@amd.com