[PATCH 0/7] cross-dev synchronization in TTM through dma-buf.

List overview All Threads
Download

newer

older

[Bug 83432] New:...

[Bug 83418] New: EU IV is...

Maarten Lankhorst

4 Sep 2014 4 Sep '14

11:37 a.m.

So this is finally it. After all the work writing support for fences cross-dev synchronization is now possible. :-)

The last 2 patches of this series are not needed for cross-dev to work. But without it any waits on cross-device fences will be done synchronously. I've previously tested this with i915, but the patches for i915 fail to apply again with the execlist stuff, so I haven't tried with the latest drm-next changes.

I would like to have the first 2 patches applied on drm-next, and the radeon/nouveau specific patches when they go through their review.

Show replies by date

Maarten Lankhorst

4 Sep 4 Sep

11:38 a.m.

New subject: [PATCH 1/7] drm: Pass dma-buf as argument to, gem_prime_import_sg_table

Allows importing reservation_objects from a dma-buf.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/drm_gem_cma_helper.c | 5 +++-- drivers/gpu/drm/drm_prime.c | 2 +- drivers/gpu/drm/msm/msm_drv.h | 2 +- drivers/gpu/drm/msm/msm_gem_prime.c | 4 ++-- drivers/gpu/drm/nouveau/nouveau_gem.h | 2 +- drivers/gpu/drm/nouveau/nouveau_prime.c | 5 +++-- drivers/gpu/drm/qxl/qxl_drv.h | 2 +- drivers/gpu/drm/qxl/qxl_prime.c | 2 +- drivers/gpu/drm/radeon/radeon_drv.c | 2 +- drivers/gpu/drm/radeon/radeon_prime.c | 5 +++-- include/drm/drmP.h | 3 ++- include/drm/drm_gem_cma_helper.h | 3 ++- 12 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index e467e67af6e7..08646af2ddc2 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -316,7 +316,8 @@ out: EXPORT_SYMBOL_GPL(drm_gem_cma_prime_get_sg_table);

struct drm_gem_object * -drm_gem_cma_prime_import_sg_table(struct drm_device *dev, size_t size, +drm_gem_cma_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, struct sg_table *sgt) { struct drm_gem_cma_object *cma_obj; @@ -325,7 +326,7 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev, size_t size, return ERR_PTR(-EINVAL);

/* Create a CMA GEM buffer. */ - cma_obj = __drm_gem_cma_create(dev, size); + cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size); if (IS_ERR(cma_obj)) return ERR_CAST(cma_obj);

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 99d578bad17e..dc4711f30382 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -522,7 +522,7 @@ struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev, goto fail_detach; }

- obj = dev->driver->gem_prime_import_sg_table(dev, dma_buf->size, sgt); + obj = dev->driver->gem_prime_import_sg_table(dev, attach, sgt); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto fail_unmap; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 8a2c5fd0893e..a0dc2592ffc1 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -170,7 +170,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); void *msm_gem_prime_vmap(struct drm_gem_object *obj); void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, - size_t size, struct sg_table *sg); + struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); void *msm_gem_vaddr_locked(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index d48f9fc5129b..b75f9940ee9e 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -37,9 +37,9 @@ void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) }

struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, - size_t size, struct sg_table *sg) + struct dma_buf_attachment *attach, struct sg_table *sg) { - return msm_gem_import(dev, size, sg); + return msm_gem_import(dev, attach->dmabuf->size, sg); }

int msm_gem_prime_pin(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h index ddab762d81fe..e4049faca780 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.h +++ b/drivers/gpu/drm/nouveau/nouveau_gem.h @@ -39,7 +39,7 @@ struct reservation_object *nouveau_gem_prime_res_obj(struct drm_gem_object *); extern void nouveau_gem_prime_unpin(struct drm_gem_object *); extern struct sg_table *nouveau_gem_prime_get_sg_table(struct drm_gem_object *); extern struct drm_gem_object *nouveau_gem_prime_import_sg_table( - struct drm_device *, size_t size, struct sg_table *); + struct drm_device *, struct dma_buf_attachment *, struct sg_table *); extern void *nouveau_gem_prime_vmap(struct drm_gem_object *); extern void nouveau_gem_prime_vunmap(struct drm_gem_object *, void *);

diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 1f51008e4d26..2215cdba587d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -23,6 +23,7 @@ */

#include <drm/drmP.h> +#include <linux/dma-buf.h>

#include "nouveau_drm.h" #include "nouveau_gem.h" @@ -56,7 +57,7 @@ void nouveau_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) }

struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, - size_t size, + struct dma_buf_attachment *attach, struct sg_table *sg) { struct nouveau_bo *nvbo; @@ -65,7 +66,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,

flags = TTM_PL_FLAG_TT;

- ret = nouveau_bo_new(dev, size, 0, flags, 0, 0, + ret = nouveau_bo_new(dev, attach->dmabuf->size, 0, flags, 0, 0, sg, &nvbo); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index d75c0a9f674f..0eb97500dd5f 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -535,7 +535,7 @@ int qxl_gem_prime_pin(struct drm_gem_object *obj); void qxl_gem_prime_unpin(struct drm_gem_object *obj); struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object *qxl_gem_prime_import_sg_table( - struct drm_device *dev, size_t size, + struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); void *qxl_gem_prime_vmap(struct drm_gem_object *obj); void qxl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c index ba0689c728e8..3d031b50a8fd 100644 --- a/drivers/gpu/drm/qxl/qxl_prime.c +++ b/drivers/gpu/drm/qxl/qxl_prime.c @@ -46,7 +46,7 @@ struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj) }

struct drm_gem_object *qxl_gem_prime_import_sg_table( - struct drm_device *dev, size_t size, + struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *table) { WARN_ONCE(1, "not implemented"); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f1e96e094b00..911d9bc50903 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -133,7 +133,7 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, - size_t size, + struct dma_buf_attachment *, struct sg_table *sg); int radeon_gem_prime_pin(struct drm_gem_object *obj); void radeon_gem_prime_unpin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index d5414d42e44b..171daf7fc483 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -27,6 +27,7 @@

#include "radeon.h" #include <drm/radeon_drm.h> +#include <linux/dma-buf.h>

struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj) { @@ -57,14 +58,14 @@ void radeon_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) }

struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, - size_t size, + struct dma_buf_attachment *attach, struct sg_table *sg) { struct radeon_device *rdev = dev->dev_private; struct radeon_bo *bo; int ret;

- ret = radeon_bo_create(rdev, size, PAGE_SIZE, false, + ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false, RADEON_GEM_DOMAIN_GTT, 0, sg, &bo); if (ret) return ERR_PTR(ret); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 6b4fbf3a3263..31ef4c263c51 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -888,7 +888,8 @@ struct drm_driver { struct drm_gem_object *obj); struct sg_table *(*gem_prime_get_sg_table)(struct drm_gem_object *obj); struct drm_gem_object *(*gem_prime_import_sg_table)( - struct drm_device *dev, size_t size, + struct drm_device *dev, + struct dma_buf_attachment *attach, struct sg_table *sgt); void *(*gem_prime_vmap)(struct drm_gem_object *obj); void (*gem_prime_vunmap)(struct drm_gem_object *obj, void *vaddr); diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 2a3cea91606d..5096807c1af0 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -44,7 +44,8 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *obj, struct seq_file *m);

struct sg_table *drm_gem_cma_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object * -drm_gem_cma_prime_import_sg_table(struct drm_device *dev, size_t size, +drm_gem_cma_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, struct sg_table *sgt); int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);

-- 2.0.4

Maarten Lankhorst

11:39 a.m.

New subject: [PATCH 2/7] drm/ttm: add reservation_object as argument to ttm_bo_init

This allows importing reservation objects from dma-bufs.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/ast/ast_ttm.c | 2 +- drivers/gpu/drm/bochs/bochs_mm.c | 2 +- drivers/gpu/drm/cirrus/cirrus_ttm.c | 2 +- drivers/gpu/drm/mgag200/mgag200_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/qxl/qxl_object.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 24 ++++++++++++++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- include/drm/ttm/ttm_bo_api.h | 2 ++ 10 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 8008ea0bc76c..58c19cfe6af0 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -339,7 +339,7 @@ int ast_bo_create(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&ast->ttm.bdev, &astbo->bo, size, ttm_bo_type_device, &astbo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, - NULL, ast_bo_ttm_destroy); + NULL, NULL, ast_bo_ttm_destroy); if (ret) return ret;

diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c index 2af30e7607d7..6c50a7a44864 100644 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ b/drivers/gpu/drm/bochs/bochs_mm.c @@ -377,7 +377,7 @@ static int bochs_bo_create(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&bochs->ttm.bdev, &bochsbo->bo, size, ttm_bo_type_device, &bochsbo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, - NULL, bochs_bo_ttm_destroy); + NULL, NULL, bochs_bo_ttm_destroy); if (ret) return ret;

diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 3e7d758330a9..b3b3d16d1279 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -343,7 +343,7 @@ int cirrus_bo_create(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&cirrus->ttm.bdev, &cirrusbo->bo, size, ttm_bo_type_device, &cirrusbo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, - NULL, cirrus_bo_ttm_destroy); + NULL, NULL, cirrus_bo_ttm_destroy); if (ret) return ret;

diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index be883ef5a1d3..398b6fb161a6 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -339,7 +339,7 @@ int mgag200_bo_create(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&mdev->ttm.bdev, &mgabo->bo, size, ttm_bo_type_device, &mgabo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, - NULL, mgag200_bo_ttm_destroy); + NULL, NULL, mgag200_bo_ttm_destroy); if (ret) return ret;

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index eea74b127b03..bda32276bcc2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -230,7 +230,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&drm->ttm.bdev, &nvbo->bo, size, type, &nvbo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, sg, - nouveau_bo_del_ttm); + NULL, nouveau_bo_del_ttm); if (ret) { /* ttm will call nouveau_bo_del_ttm if it fails.. */ return ret; diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index 69c104c3240f..cdeaf08fdc74 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -110,7 +110,7 @@ int qxl_bo_create(struct qxl_device *qdev,

r = ttm_bo_init(&qdev->mman.bdev, &bo->tbo, size, type, &bo->placement, 0, !kernel, NULL, size, - NULL, &qxl_ttm_bo_destroy); + NULL, NULL, &qxl_ttm_bo_destroy); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) dev_err(qdev->dev, diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index aadbd36e64b9..61f3f16bbcbc 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -209,7 +209,7 @@ int radeon_bo_create(struct radeon_device *rdev, down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, - acc_size, sg, &radeon_ttm_bo_destroy); + acc_size, sg, NULL, &radeon_ttm_bo_destroy); up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { return r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a11969acfea5..8f5cec67c47d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1068,6 +1068,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, struct file *persistent_swap_storage, size_t acc_size, struct sg_table *sg, + struct reservation_object *resv, void (*destroy) (struct ttm_buffer_object *)) { int ret = 0; @@ -1121,8 +1122,13 @@ int ttm_bo_init(struct ttm_bo_device *bdev, bo->persistent_swap_storage = persistent_swap_storage; bo->acc_size = acc_size; bo->sg = sg; - bo->resv = &bo->ttm_resv; - reservation_object_init(bo->resv); + if (resv) { + bo->resv = resv; + lockdep_assert_held(&bo->resv->lock.base); + } else { + bo->resv = &bo->ttm_resv; + reservation_object_init(&bo->ttm_resv); + } atomic_inc(&bo->glob->bo_count); drm_vma_node_reset(&bo->vma_node);

@@ -1135,13 +1141,19 @@ int ttm_bo_init(struct ttm_bo_device *bdev, ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node, bo->mem.num_pages);

- locked = ww_mutex_trylock(&bo->resv->lock); - WARN_ON(!locked); + /* passed reservation objects should already be locked, + * since otherwise lockdep will be angered in radeon. + */ + if (!resv) { + locked = ww_mutex_trylock(&bo->resv->lock); + WARN_ON(!locked); + }

if (likely(!ret)) ret = ttm_bo_validate(bo, placement, interruptible, false);

- ttm_bo_unreserve(bo); + if (!resv) + ttm_bo_unreserve(bo);

if (unlikely(ret)) ttm_bo_unref(&bo); @@ -1199,7 +1211,7 @@ int ttm_bo_create(struct ttm_bo_device *bdev, acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object)); ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment, interruptible, persistent_swap_storage, acc_size, - NULL, NULL); + NULL, NULL, NULL); if (likely(ret == 0)) *p_bo = bo;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index ff0e03b97753..4098b2aa5588 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -429,7 +429,7 @@ int vmw_dmabuf_init(struct vmw_private *dev_priv, ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, 0, interruptible, - NULL, acc_size, NULL, bo_free); + NULL, acc_size, NULL, NULL, bo_free); return ret; }

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 70b44917c368..0ccf7f267ff9 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -460,6 +460,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, * point to the shmem object backing a GEM object if TTM is used to back a * GEM user interface. * @acc_size: Accounted size for this object. + * @resv: Pointer to a reservation_object, or NULL to let ttm allocate one. * @destroy: Destroy function. Use NULL for kfree(). * * This function initializes a pre-allocated struct ttm_buffer_object. @@ -487,6 +488,7 @@ extern int ttm_bo_init(struct ttm_bo_device *bdev, struct file *persistent_swap_storage, size_t acc_size, struct sg_table *sg, + struct reservation_object *resv, void (*destroy) (struct ttm_buffer_object *));

/**

-- 2.0.4

Maarten Lankhorst

11:40 a.m.

New subject: [PATCH 3/7] drm/radeon: cope with foreign fences inside the reservation object

Not the whole world is a radeon! :-)

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/radeon/radeon.h | 11 --------- drivers/gpu/drm/radeon/radeon_cs.c | 32 +++++++++++++++++++++---- drivers/gpu/drm/radeon/radeon_display.c | 41 ++++++++++++++++++++++++--------- drivers/gpu/drm/radeon/radeon_fence.c | 3 +++ drivers/gpu/drm/radeon/radeon_mode.h | 1 + 5 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d80dc547a105..dddb2b7dd752 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -699,17 +699,6 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell); * IRQS. */

-struct radeon_flip_work { - struct work_struct flip_work; - struct work_struct unpin_work; - struct radeon_device *rdev; - int crtc_id; - uint64_t base; - struct drm_pending_vblank_event *event; - struct radeon_bo *old_rbo; - struct radeon_fence *fence; -}; - struct r500_irq_stat_regs { u32 disp_int; u32 hdmi0_status; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 6e3d1c8f3483..8ad4e2cfae15 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -248,23 +248,34 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }

-static void radeon_cs_sync_rings(struct radeon_cs_parser *p) +static int radeon_cs_sync_rings(struct radeon_cs_parser *p) { int i;

for (i = 0; i < p->nrelocs; i++) { struct reservation_object *resv; struct fence *fence; + struct radeon_fence *rfence; + int r;

if (!p->relocs[i].robj) continue;

resv = p->relocs[i].robj->tbo.resv; fence = reservation_object_get_excl(resv); + if (!fence) + continue; + rfence = to_radeon_fence(fence); + if (!rfence || rfence->rdev != p->rdev) { + r = fence_wait(fence, true); + if (r) + return r; + continue; + }

- radeon_semaphore_sync_to(p->ib.semaphore, - (struct radeon_fence *)fence); + radeon_semaphore_sync_to(p->ib.semaphore, rfence); } + return 0; }

/* XXX: note that this is called from the legacy UMS CS ioctl as well */ @@ -474,13 +485,19 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; }

+ r = radeon_cs_sync_rings(parser); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to sync rings: %i\n", r); + return r; + } + if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || (parser->ring == TN_RING_TYPE_VCE2_INDEX)) radeon_vce_note_usage(rdev);

- radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); if (r) { DRM_ERROR("Failed to schedule IB !\n"); @@ -567,7 +584,12 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; } - radeon_cs_sync_rings(parser); + r = radeon_cs_sync_rings(parser); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to sync rings: %i\n", r); + goto out; + } radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);

if ((rdev->family >= CHIP_TAHITI) && diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bc894c17b2f9..715b2d95346c 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -36,6 +36,17 @@

#include <linux/gcd.h>

+struct radeon_flip_work { + struct work_struct flip_work; + struct work_struct unpin_work; + struct radeon_device *rdev; + int crtc_id; + uint64_t base; + struct drm_pending_vblank_event *event; + struct radeon_bo *old_rbo; + struct fence *fence; +}; + static void avivo_crtc_load_lut(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); @@ -402,14 +413,21 @@ static void radeon_flip_work_func(struct work_struct *__work)

down_read(&rdev->exclusive_lock); if (work->fence) { - r = radeon_fence_wait(work->fence, false); - if (r == -EDEADLK) { - up_read(&rdev->exclusive_lock); - do { - r = radeon_gpu_reset(rdev); - } while (r == -EAGAIN); - down_read(&rdev->exclusive_lock); - } + struct radeon_fence *fence; + + fence = to_radeon_fence(work->fence); + if (fence && fence->rdev == rdev) { + r = radeon_fence_wait(fence, false); + if (r == -EDEADLK) { + up_read(&rdev->exclusive_lock); + do { + r = radeon_gpu_reset(rdev); + } while (r == -EAGAIN); + down_read(&rdev->exclusive_lock); + } + } else + r = fence_wait(fence, false); + if (r) DRM_ERROR("failed to wait on page flip fence (%d)!\n", r);

@@ -418,7 +436,8 @@ static void radeon_flip_work_func(struct work_struct *__work) * confused about which BO the CRTC is scanning out */

- radeon_fence_unref(&work->fence); + fence_put(work->fence); + work->fence = NULL; }

/* We borrow the event spin lock for protecting flip_status */ @@ -494,7 +513,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); + work->fence = fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo);

@@ -576,7 +595,7 @@ pflip_cleanup:

cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); - radeon_fence_unref(&work->fence); + fence_put(work->fence); kfree(work); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index af9f2d6bd7d0..0262fe2580d2 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -541,6 +541,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) uint64_t seq[RADEON_NUM_RINGS] = {}; long r;

+ if (unlikely(!to_radeon_fence(&fence->base))) + return fence_wait(&fence->base, intr); + seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e27608c29c11..f6e9ee573de2 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -40,6 +40,7 @@

struct radeon_bo; struct radeon_device; +struct radeon_flip_work;

#define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base) #define to_radeon_connector(x) container_of(x, struct radeon_connector, base)

-- 2.0.4

Christian König

11:45 a.m.

New subject: [PATCH 3/7] drm/radeon: cope with foreign fences inside the reservation object

Am 04.09.2014 um 13:40 schrieb Maarten Lankhorst:

...

Not the whole world is a radeon! :-)

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com

drivers/gpu/drm/radeon/radeon.h | 11 --------- drivers/gpu/drm/radeon/radeon_cs.c | 32 +++++++++++++++++++++---- drivers/gpu/drm/radeon/radeon_display.c | 41 ++++++++++++++++++++++++--------- drivers/gpu/drm/radeon/radeon_fence.c | 3 +++ drivers/gpu/drm/radeon/radeon_mode.h | 1 + 5 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d80dc547a105..dddb2b7dd752 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -699,17 +699,6 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell);

IRQS.

*/

-struct radeon_flip_work {

struct work_struct flip_work;

struct work_struct unpin_work;

struct radeon_device *rdev;

int crtc_id;

uint64_t base;

struct drm_pending_vblank_event *event;

struct radeon_bo *old_rbo;

struct radeon_fence *fence;

-};

Please keep this structure were it was, apart from that the patch looks good on first glance.

Christian.

...

struct r500_irq_stat_regs { u32 disp_int; u32 hdmi0_status; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 6e3d1c8f3483..8ad4e2cfae15 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -248,23 +248,34 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }

-static void radeon_cs_sync_rings(struct radeon_cs_parser *p) +static int radeon_cs_sync_rings(struct radeon_cs_parser *p) { int i;

for (i = 0; i < p->nrelocs; i++) { struct reservation_object *resv; struct fence *fence;
struct radeon_fence *rfence;
int r;
if (!p->relocs[i].robj) continue;

resv = p->relocs[i].robj->tbo.resv; fence = reservation_object_get_excl(resv);
if (!fence)
	continue;
rfence = to_radeon_fence(fence);
if (!rfence || rfence->rdev != p->rdev) {
	r = fence_wait(fence, true);
	if (r)
		return r;
	continue;
}
radeon_semaphore_sync_to(p->ib.semaphore,
			 (struct radeon_fence *)fence);
radeon_semaphore_sync_to(p->ib.semaphore, rfence);
}
return 0; }

/* XXX: note that this is called from the legacy UMS CS ioctl as well */
@@ -474,13 +485,19 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; }
r = radeon_cs_sync_rings(parser);

if (r) {
if (r != -ERESTARTSYS)
	DRM_ERROR("Failed to sync rings: %i\n", r);
return r;
}

if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || (parser->ring == TN_RING_TYPE_VCE2_INDEX)) radeon_vce_note_usage(rdev);
radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); if (r) { DRM_ERROR("Failed to schedule IB !\n");

@@ -567,7 +584,12 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; }

radeon_cs_sync_rings(parser);
r = radeon_cs_sync_rings(parser);

if (r) {
if (r != -ERESTARTSYS)
	DRM_ERROR("Failed to sync rings: %i\n", r);
goto out;
} radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);

if ((rdev->family >= CHIP_TAHITI) &&
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bc894c17b2f9..715b2d95346c 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -36,6 +36,17 @@

#include <linux/gcd.h>

+struct radeon_flip_work {

struct work_struct flip_work;

struct work_struct unpin_work;

struct radeon_device *rdev;

int crtc_id;

uint64_t base;

struct drm_pending_vblank_event *event;

struct radeon_bo *old_rbo;

struct fence *fence;

+};

static void avivo_crtc_load_lut(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);

@@ -402,14 +413,21 @@ static void radeon_flip_work_func(struct work_struct *__work)
      down_read(&rdev->exclusive_lock);
if (work->fence) {
r = radeon_fence_wait(work->fence, false);
if (r == -EDEADLK) {
	up_read(&rdev->exclusive_lock);
	do {
		r = radeon_gpu_reset(rdev);
	} while (r == -EAGAIN);
	down_read(&rdev->exclusive_lock);
}
struct radeon_fence *fence;
fence = to_radeon_fence(work->fence);
if (fence && fence->rdev == rdev) {
	r = radeon_fence_wait(fence, false);
	if (r == -EDEADLK) {
		up_read(&rdev->exclusive_lock);
		do {
			r = radeon_gpu_reset(rdev);
		} while (r == -EAGAIN);
		down_read(&rdev->exclusive_lock);
	}
} else
	r = fence_wait(fence, false);
if (r) DRM_ERROR("failed to wait on page flip fence (%d)!\n", r);
@@ -418,7 +436,8 @@ static void radeon_flip_work_func(struct work_struct *__work) * confused about which BO the CRTC is scanning out */
radeon_fence_unref(&work->fence);
fence_put(work->fence);
work->fence = NULL;
}

/* We borrow the event spin lock for protecting flip_status */
@@ -494,7 +513,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; }

work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv));

work->fence = fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo);

@@ -576,7 +595,7 @@ pflip_cleanup:

cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);

radeon_fence_unref(&work->fence);

fence_put(work->fence); kfree(work); return r; }

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index af9f2d6bd7d0..0262fe2580d2 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -541,6 +541,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) uint64_t seq[RADEON_NUM_RINGS] = {}; long r;
if (unlikely(!to_radeon_fence(&fence->base)))
return fence_wait(&fence->base, intr);
seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e27608c29c11..f6e9ee573de2 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -40,6 +40,7 @@

struct radeon_bo; struct radeon_device; +struct radeon_flip_work;

#define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base) #define to_radeon_connector(x) container_of(x, struct radeon_connector, base)

Maarten Lankhorst

11:41 a.m.

New subject: [PATCH 4/7] drm/radeon: export reservation_object from dmabuf to ttm

Adds an extra argument to radeon_bo_create, which is used in radeon_prime.c.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 6 +++--- drivers/gpu/drm/radeon/r600.c | 4 ++-- drivers/gpu/drm/radeon/radeon_benchmark.c | 4 ++-- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 8 +++++--- drivers/gpu/drm/radeon/radeon_object.h | 1 + drivers/gpu/drm/radeon/radeon_prime.c | 5 ++++- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- drivers/gpu/drm/radeon/radeon_test.c | 5 +++-- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 3 ++- drivers/gpu/drm/radeon/radeon_vce.c | 3 ++- drivers/gpu/drm/radeon/radeon_vm.c | 5 +++-- 17 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 1f598ab3b9a7..d984de903928 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4689,7 +4689,7 @@ static int cik_mec_init(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &rdev->mec.hpd_eop_obj); if (r) { dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -4860,7 +4860,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) sizeof(struct bonaire_mqd), PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, 0, NULL, - &rdev->ring[idx].mqd_obj); + NULL, &rdev->ring[idx].mqd_obj); if (r) { dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); return r; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index dbca60c7d097..c6ccef6c3596 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4023,7 +4023,7 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.save_restore_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, - &rdev->rlc.save_restore_obj); + NULL, &rdev->rlc.save_restore_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); return r; @@ -4102,7 +4102,7 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.clear_state_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, - &rdev->rlc.clear_state_obj); + NULL, &rdev->rlc.clear_state_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); sumo_rlc_fini(rdev); @@ -4179,7 +4179,7 @@ int sumo_rlc_init(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, - &rdev->rlc.cp_table_obj); + NULL, &rdev->rlc.cp_table_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r); sumo_rlc_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a95ced569d84..94e82c6b03ca 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1430,7 +1430,7 @@ int r600_vram_scratch_init(struct radeon_device *rdev) if (rdev->vram_scratch.robj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, &rdev->vram_scratch.robj); + 0, NULL, NULL, &rdev->vram_scratch.robj); if (r) { return r; } @@ -3377,7 +3377,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->ih.ring_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, 0, - NULL, &rdev->ih.ring_obj); + NULL, NULL, &rdev->ih.ring_obj); if (r) { DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 69f5695bdab9..49da0b0a2d35 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -97,7 +97,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, int time;

n = RADEON_BENCHMARK_ITERATIONS; - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, &sobj); + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, NULL, &sobj); if (r) { goto out_cleanup; } @@ -109,7 +109,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, if (r) { goto out_cleanup; } - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, &dobj); + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e84a76e6656a..6fbab1582112 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -430,7 +430,7 @@ int radeon_wb_init(struct radeon_device *rdev)

if (rdev->wb.wb_obj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &rdev->wb.wb_obj); if (r) { dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a053a0779aac..84146d5901aa 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -128,7 +128,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev) if (rdev->gart.robj == NULL) { r = radeon_bo_create(rdev, rdev->gart.table_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, &rdev->gart.robj); + 0, NULL, NULL, &rdev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 4b7c8ec36c2f..c194497aa586 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -67,7 +67,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,

retry: r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain, - flags, NULL, &robj); + flags, NULL, NULL, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == RADEON_GEM_DOMAIN_VRAM) { diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 61f3f16bbcbc..67997f329c7b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -160,8 +160,10 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) }

int radeon_bo_create(struct radeon_device *rdev, - unsigned long size, int byte_align, bool kernel, u32 domain, - u32 flags, struct sg_table *sg, struct radeon_bo **bo_ptr) + unsigned long size, int byte_align, bool kernel, + u32 domain, u32 flags, struct sg_table *sg, + struct reservation_object *robj, + struct radeon_bo **bo_ptr) { struct radeon_bo *bo; enum ttm_bo_type type; @@ -209,7 +211,7 @@ int radeon_bo_create(struct radeon_device *rdev, down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, - acc_size, sg, NULL, &radeon_ttm_bo_destroy); + acc_size, sg, robj, &radeon_ttm_bo_destroy); up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 98a47fdf3625..fb6ec9ba97fa 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -126,6 +126,7 @@ extern int radeon_bo_create(struct radeon_device *rdev, unsigned long size, int byte_align, bool kernel, u32 domain, u32 flags, struct sg_table *sg, + struct reservation_object *robj, struct radeon_bo **bo_ptr); extern int radeon_bo_kmap(struct radeon_bo *bo, void **ptr); extern void radeon_bo_kunmap(struct radeon_bo *bo); diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index 171daf7fc483..aff2a5e4ded3 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -61,12 +61,15 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg) { + struct reservation_object *robj = attach->dmabuf->resv; struct radeon_device *rdev = dev->dev_private; struct radeon_bo *bo; int ret;

+ ww_mutex_lock(&robj->lock, NULL); ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false, - RADEON_GEM_DOMAIN_GTT, 0, sg, &bo); + RADEON_GEM_DOMAIN_GTT, 0, sg, robj, &bo); + ww_mutex_unlock(&robj->lock); if (ret) return ERR_PTR(ret);

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index d65607902537..2537cb298522 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -404,7 +404,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index b84f97c8718c..c507896aca45 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -65,7 +65,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, }

r = radeon_bo_create(rdev, size, align, true, - domain, flags, NULL, &sa_manager->bo); + domain, flags, NULL, NULL, &sa_manager->bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 17bc3dced9f1..a6296f7daef6 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -67,7 +67,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) }

r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, &vram_obj); + 0, NULL, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -87,7 +87,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) struct radeon_fence *fence = NULL;

r = radeon_bo_create(rdev, size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, + gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 62d1f4d730a2..0f2215405883 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -858,7 +858,7 @@ int radeon_ttm_init(struct radeon_device *rdev) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);

r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->stollen_vga_memory); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index ba4f38916026..11b662469253 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -141,7 +141,8 @@ int radeon_uvd_init(struct radeon_device *rdev) RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); + RADEON_GEM_DOMAIN_VRAM, 0, NULL, + NULL, &rdev->uvd.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index c7190aadbd89..9e85757d5599 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -126,7 +126,8 @@ int radeon_vce_init(struct radeon_device *rdev) size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->vce.vcpu_bo); + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, + &rdev->vce.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 671ee566aa51..440c3dfbd09f 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -545,7 +545,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,

r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, RADEON_GPU_PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt); + RADEON_GEM_DOMAIN_VRAM, 0, + NULL, NULL, &pt); if (r) return r;

@@ -1132,7 +1133,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)

r = radeon_bo_create(rdev, pd_size, align, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, - &vm->page_directory); + NULL, &vm->page_directory); if (r) return r;

-- 2.0.4

Maarten Lankhorst

11:41 a.m.

New subject: [PATCH 5/7] drm/nouveau: export reservation_object from dmabuf to ttm

Adds an extra argument to nouveau_bo_new, which is used in nouveau_prime.c.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/dispnv04/crtc.c | 2 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++-- drivers/gpu/drm/nouveau/nouveau_bo.h | 1 + drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 6 +++++- drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- drivers/gpu/drm/nouveau/nouveau_prime.c | 5 ++++- drivers/gpu/drm/nouveau/nv17_fence.c | 2 +- drivers/gpu/drm/nouveau/nv50_display.c | 6 +++--- drivers/gpu/drm/nouveau/nv50_fence.c | 2 +- drivers/gpu/drm/nouveau/nv84_fence.c | 4 ++-- 11 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c index b90aa5c1f90a..fca6a1f9c20c 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c @@ -1127,7 +1127,7 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num) drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);

ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &nv_crtc->cursor.nvbo); + 0, 0x0000, NULL, NULL, &nv_crtc->cursor.nvbo); if (!ret) { ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM); if (!ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index bda32276bcc2..f89b4a7c93fe 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -181,7 +181,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, int nouveau_bo_new(struct drm_device *dev, int size, int align, uint32_t flags, uint32_t tile_mode, uint32_t tile_flags, - struct sg_table *sg, + struct sg_table *sg, struct reservation_object *robj, struct nouveau_bo **pnvbo) { struct nouveau_drm *drm = nouveau_drm(dev); @@ -230,7 +230,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, ret = ttm_bo_init(&drm->ttm.bdev, &nvbo->bo, size, type, &nvbo->placement, align >> PAGE_SHIFT, false, NULL, acc_size, sg, - NULL, nouveau_bo_del_ttm); + robj, nouveau_bo_del_ttm); if (ret) { /* ttm will call nouveau_bo_del_ttm if it fails.. */ return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index ae95b2d43b36..d20c0b5c4e31 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -68,6 +68,7 @@ extern struct ttm_bo_driver nouveau_bo_driver; void nouveau_bo_move_init(struct nouveau_drm *); int nouveau_bo_new(struct drm_device *, int size, int align, u32 flags, u32 tile_mode, u32 tile_flags, struct sg_table *sg, + struct reservation_object *robj, struct nouveau_bo **); int nouveau_bo_pin(struct nouveau_bo *, u32 flags); int nouveau_bo_unpin(struct nouveau_bo *); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 99cd9e4a2aa6..d639750379d6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -106,7 +106,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, if (nouveau_vram_pushbuf) target = TTM_PL_FLAG_VRAM;

- ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL, + ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL, NULL, &chan->push.buffer); if (ret == 0) { ret = nouveau_bo_pin(chan->push.buffer, target); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index decfe6c4ac07..574517a396fd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -195,8 +195,12 @@ nouveau_fence_work(struct fence *fence,

work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) { + /* + * this might not be a nouveau fence any more, + * so force a lazy wait here + */ WARN_ON(nouveau_fence_wait((struct nouveau_fence *)fence, - false, false)); + true, false)); goto err; }

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index b7dbd16904e0..1bc4eb33b60f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -165,7 +165,7 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain, flags |= TTM_PL_FLAG_SYSTEM;

ret = nouveau_bo_new(dev, size, align, flags, tile_mode, - tile_flags, NULL, pnvbo); + tile_flags, NULL, NULL, pnvbo); if (ret) return ret; nvbo = *pnvbo; diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 2215cdba587d..228226ab27fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -61,13 +61,16 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sg) { struct nouveau_bo *nvbo; + struct reservation_object *robj = attach->dmabuf->resv; u32 flags = 0; int ret;

flags = TTM_PL_FLAG_TT;

+ ww_mutex_lock(&robj->lock, NULL); ret = nouveau_bo_new(dev, attach->dmabuf->size, 0, flags, 0, 0, - sg, &nvbo); + sg, robj, &nvbo); + ww_mutex_unlock(&robj->lock); if (ret) return ERR_PTR(ret);

diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c index 6f9a1f8e2d0f..40b461c7d5c5 100644 --- a/drivers/gpu/drm/nouveau/nv17_fence.c +++ b/drivers/gpu/drm/nouveau/nv17_fence.c @@ -129,7 +129,7 @@ nv17_fence_create(struct nouveau_drm *drm) spin_lock_init(&priv->lock);

ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &priv->bo); + 0, 0x0000, NULL, NULL, &priv->bo); if (!ret) { ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM); if (!ret) { diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 03949eaa629f..393ff08c0174 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -1378,7 +1378,7 @@ nv50_crtc_create(struct drm_device *dev, int index) drm_mode_crtc_set_gamma_size(crtc, 256);

ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &head->base.lut.nvbo); + 0, 0x0000, NULL, NULL, &head->base.lut.nvbo); if (!ret) { ret = nouveau_bo_pin(head->base.lut.nvbo, TTM_PL_FLAG_VRAM); if (!ret) { @@ -1401,7 +1401,7 @@ nv50_crtc_create(struct drm_device *dev, int index) goto out;

ret = nouveau_bo_new(dev, 64 * 64 * 4, 0x100, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &head->base.cursor.nvbo); + 0, 0x0000, NULL, NULL, &head->base.cursor.nvbo); if (!ret) { ret = nouveau_bo_pin(head->base.cursor.nvbo, TTM_PL_FLAG_VRAM); if (!ret) { @@ -2458,7 +2458,7 @@ nv50_display_create(struct drm_device *dev)

/* small shared memory area we use for notifiers and semaphores */ ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &disp->sync); + 0, 0x0000, NULL, NULL, &disp->sync); if (!ret) { ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM); if (!ret) { diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c index 08fad3668a1c..22d242b37962 100644 --- a/drivers/gpu/drm/nouveau/nv50_fence.c +++ b/drivers/gpu/drm/nouveau/nv50_fence.c @@ -100,7 +100,7 @@ nv50_fence_create(struct nouveau_drm *drm) spin_lock_init(&priv->lock);

ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, &priv->bo); + 0, 0x0000, NULL, NULL, &priv->bo); if (!ret) { ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM); if (!ret) { diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index a2f28082c272..7b372a68aa4e 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -233,7 +233,7 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.uevent = true;

ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0, - TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo); + TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, &priv->bo); if (ret == 0) { ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM); if (ret == 0) { @@ -247,7 +247,7 @@ nv84_fence_create(struct nouveau_drm *drm)

if (ret == 0) ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0, - TTM_PL_FLAG_TT, 0, 0, NULL, + TTM_PL_FLAG_TT, 0, 0, NULL, NULL, &priv->bo_gart); if (ret == 0) { ret = nouveau_bo_pin(priv->bo_gart, TTM_PL_FLAG_TT);

-- 2.0.4

Maarten Lankhorst

11:42 a.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/radeon/radeon.h | 17 ++- drivers/gpu/drm/radeon/radeon_cs.c | 30 ++--- drivers/gpu/drm/radeon/radeon_fence.c | 13 ++- drivers/gpu/drm/radeon/radeon_semaphore.c | 184 ++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dddb2b7dd752..cd18fa7f801c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -359,6 +359,11 @@ struct radeon_fence_driver { struct delayed_work lockup_work; };

+struct radeon_fence_cb { + struct fence_cb base; + struct fence *fence; +}; + struct radeon_fence { struct fence base;

@@ -368,6 +373,10 @@ struct radeon_fence { unsigned ring;

wait_queue_t fence_wake; + + atomic_t num_cpu_cbs; + struct radeon_fence_cb *cpu_cbs; + uint32_t *cpu_sema; };

int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -574,9 +583,11 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, */ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; - signed waiters; + signed waiters, cpu_waiters, cpu_waiters_max; uint64_t gpu_addr; struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + uint32_t *cpu_sema; + struct radeon_fence_cb *cpu_cbs; };

int radeon_semaphore_create(struct radeon_device *rdev, @@ -587,6 +598,10 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, struct radeon_fence *fence); +int radeon_semaphore_sync_obj(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, + struct reservation_object *resv); + int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, int waiting_ring); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8ad4e2cfae15..b141f5bd029d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -250,32 +250,16 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority

static int radeon_cs_sync_rings(struct radeon_cs_parser *p) { - int i; - - for (i = 0; i < p->nrelocs; i++) { - struct reservation_object *resv; - struct fence *fence; - struct radeon_fence *rfence; - int r; + int i, ret = 0;

+ for (i = 0; !ret && i < p->nrelocs; i++) { if (!p->relocs[i].robj) continue;

- resv = p->relocs[i].robj->tbo.resv; - fence = reservation_object_get_excl(resv); - if (!fence) - continue; - rfence = to_radeon_fence(fence); - if (!rfence || rfence->rdev != p->rdev) { - r = fence_wait(fence, true); - if (r) - return r; - continue; - } - - radeon_semaphore_sync_to(p->ib.semaphore, rfence); + ret = radeon_semaphore_sync_obj(p->rdev, p->ib.semaphore, + p->relocs[i].robj->tbo.resv); } - return 0; + return ret; }

/* XXX: note that this is called from the legacy UMS CS ioctl as well */ @@ -442,6 +426,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo */ list_sort(NULL, &parser->validated, cmp_size_smaller_first);

+ /* must be called with all reservation_objects still held */ + radeon_semaphore_free(parser->rdev, &parser->ib.semaphore, + parser->ib.fence); + ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, &parser->ib.fence->base); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 0262fe2580d2..7687a7f8f41b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -142,6 +142,8 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->ring = ring; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq); + (*fence)->cpu_cbs = NULL; + (*fence)->cpu_sema = NULL; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); @@ -1057,11 +1059,20 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr, return t; }

+static void __radeon_fence_destroy(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + + WARN_ON(fence->cpu_cbs); + kfree(fence->cpu_cbs); + fence_free(f); +} + const struct fence_ops radeon_fence_ops = { .get_driver_name = radeon_fence_get_driver_name, .get_timeline_name = radeon_fence_get_timeline_name, .enable_signaling = radeon_fence_enable_signaling, .signaled = radeon_fence_is_signaled, .wait = radeon_fence_default_wait, - .release = NULL, + .release = __radeon_fence_destroy, }; diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 56d9fd66d8ae..2e71463d11c5 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include "radeon.h" #include "radeon_trace.h" +#include <trace/events/fence.h>

int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -49,7 +50,11 @@ int radeon_semaphore_create(struct radeon_device *rdev, return r; } (*semaphore)->waiters = 0; + (*semaphore)->cpu_waiters = 0; + (*semaphore)->cpu_waiters_max = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); + (*semaphore)->cpu_sema = NULL; + (*semaphore)->cpu_cbs = NULL;

cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); for (i = 0; i < RADEON_NUM_SYNCS; ++i) @@ -115,6 +120,101 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); }

+int radeon_semaphore_reserve_cpu_waiters(struct radeon_semaphore *semaphore, int add) +{ + int max = 4; + struct radeon_fence_cb *cpu_cbs; + + if (semaphore->cpu_waiters + add <= semaphore->cpu_waiters_max) + return 0; + + if (semaphore->cpu_waiters_max) + max = semaphore->cpu_waiters_max * 2; + + cpu_cbs = krealloc(semaphore->cpu_cbs, max * sizeof(*cpu_cbs), GFP_KERNEL); + if (!cpu_cbs) + return -ENOMEM; + semaphore->cpu_cbs = cpu_cbs; + semaphore->cpu_waiters_max = max; + return 0; +} + +static void radeon_semaphore_add_cpu_cb(struct radeon_semaphore *semaphore, + struct fence *fence) +{ + unsigned i; + struct radeon_fence_cb *empty = NULL; + + for (i = 0; i < semaphore->cpu_waiters; ++i) { + struct fence *other = semaphore->cpu_cbs[i].fence; + + if (!other) + empty = &semaphore->cpu_cbs[i]; + else if (other->context == fence->context) { + semaphore->cpu_cbs[i].fence = fence_later(other, fence); + return; + } + } + + if (!empty) + empty = &semaphore->cpu_cbs[semaphore->cpu_waiters++]; + + empty->fence = fence; + return; +} + +/** + * radeon_semaphore_sync_obj - use the semaphore to sync to a bo + * + * @semaphore: semaphore object to add fence to + * @resv: the reservation_object to sync to + * + * Sync the reservation_object using this semaphore. + * + * radeon_semaphore_free must be called with all reservation_object locks + * still held!!! + */ +int radeon_semaphore_sync_obj(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, + struct reservation_object *resv) +{ + struct fence *fence; + struct radeon_fence *rfence; + struct reservation_object_list *fobj; + int ret, i; + + fobj = reservation_object_get_list(resv); + if (fobj && fobj->shared_count) { + ret = radeon_semaphore_reserve_cpu_waiters(semaphore, fobj->shared_count); + if (ret) + return ret; + for (i = 0; i < fobj->shared_count; ++i) { + fence = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + radeon_semaphore_add_cpu_cb(semaphore, fence); + } + return 0; + } + + fence = reservation_object_get_excl(resv); + if (!fence) + return 0; + + rfence = to_radeon_fence(fence); + if (rfence && rfence->rdev == rdev) { + struct radeon_fence *other = semaphore->sync_to[rfence->ring]; + + semaphore->sync_to[rfence->ring] = + radeon_fence_later(rfence, other); + return 0; + } + ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 1); + if (!ret) + radeon_semaphore_add_cpu_cb(semaphore, fence); + return ret; +} + /** * radeon_semaphore_sync_rings - sync ring to all registered fences * @@ -124,6 +224,8 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, * * Ensure that all registered fences are signaled before letting * the ring continue. The caller must hold the ring lock. + * + * This function may only be called once on a semaphore. */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, @@ -132,6 +234,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, unsigned count = 0; int i, r;

+ if (semaphore->cpu_waiters) { + /* allocate enough space for sync command */ + if (radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + semaphore->cpu_sema = radeon_sa_bo_cpu_addr(semaphore->sa_bo); + semaphore->gpu_addr += 8; + ++count; + } else + semaphore->cpu_waiters = -1; + } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { struct radeon_fence *fence = semaphore->sync_to[i];

@@ -188,6 +300,68 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, return 0; }

+static void radeon_semaphore_cpu_trigger(struct fence *other_fence, + struct fence_cb *fence_cb) +{ + struct radeon_fence_cb *cb = (struct radeon_fence_cb*)fence_cb; + struct radeon_fence *fence = (struct radeon_fence *)cb->fence; + +#ifdef CONFIG_FENCE_TRACE + int ret = atomic_dec_return(&fence->num_cpu_cbs); + + if (ret) + FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n", + ret, other_fence->context, other_fence->seqno); + else +#else + if (atomic_dec_and_test(&fence->num_cpu_cbs)) +#endif + { + FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n", + other_fence->context, other_fence->seqno); + + *fence->cpu_sema = ~0; + + kfree(fence->cpu_cbs); + fence->cpu_cbs = NULL; + } +} + +static void radeon_semaphore_arm_cpu_cbs(struct radeon_semaphore *semaphore, + struct radeon_fence *fence) +{ + unsigned i, skipped = 0; + + fence->cpu_cbs = semaphore->cpu_cbs; + fence->cpu_sema = semaphore->cpu_sema; + atomic_set(&fence->num_cpu_cbs, semaphore->cpu_waiters); + + for (i = 0; i < semaphore->cpu_waiters; ++i) { + struct fence *other = fence->cpu_cbs[i].fence; + + if (other) { + fence->cpu_cbs[i].fence = &fence->base; + trace_fence_annotate_wait_on(&fence->base, other); + + FENCE_TRACE(&fence->base, "queued wait on %u#%u\n", + other->context, other->seqno); + + if (!fence_add_callback(other, &fence->cpu_cbs[i].base, + radeon_semaphore_cpu_trigger)) + continue; + } + skipped++; + } + + if (skipped && atomic_sub_and_test(skipped, &fence->num_cpu_cbs)) { + FENCE_TRACE(&fence->base, "No triggers, starting..\n"); + + *fence->cpu_sema = ~0; + kfree(fence->cpu_cbs); + fence->cpu_cbs = NULL; + } +} + void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence) @@ -195,6 +369,16 @@ void radeon_semaphore_free(struct radeon_device *rdev, if (semaphore == NULL || *semaphore == NULL) { return; } + if ((*semaphore)->cpu_cbs) { + (*semaphore)->waiters--; + + if (!fence) { + *(*semaphore)->cpu_sema = ~0U; + kfree((*semaphore)->cpu_cbs); + } else + radeon_semaphore_arm_cpu_cbs(*semaphore, fence); + } + if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," " hardware lockup imminent!\n", *semaphore);

-- 2.0.4

Christian König

11:54 a.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

Am 04.09.2014 um 13:42 schrieb Maarten Lankhorst:

...

Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu.

I'm not sure if this will work reliable when the semaphores are in system memory. We might need to reserve some VRAM for them instead.

Regards, Christian.

...

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com

drivers/gpu/drm/radeon/radeon.h | 17 ++- drivers/gpu/drm/radeon/radeon_cs.c | 30 ++--- drivers/gpu/drm/radeon/radeon_fence.c | 13 ++- drivers/gpu/drm/radeon/radeon_semaphore.c | 184 ++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dddb2b7dd752..cd18fa7f801c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -359,6 +359,11 @@ struct radeon_fence_driver { struct delayed_work lockup_work; };

+struct radeon_fence_cb {

struct fence_cb base;

struct fence *fence;

+};

struct radeon_fence { struct fence base;

@@ -368,6 +373,10 @@ struct radeon_fence { unsigned ring;

wait_queue_t fence_wake;

atomic_t num_cpu_cbs;

struct radeon_fence_cb *cpu_cbs;

uint32_t *cpu_sema; };

int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);

@@ -574,9 +583,11 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, */ struct radeon_semaphore { struct radeon_sa_bo *sa_bo;

signed waiters;

signed waiters, cpu_waiters, cpu_waiters_max; uint64_t gpu_addr; struct radeon_fence *sync_to[RADEON_NUM_RINGS];

uint32_t *cpu_sema;

struct radeon_fence_cb *cpu_cbs; };

int radeon_semaphore_create(struct radeon_device *rdev,

@@ -587,6 +598,10 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, struct radeon_fence *fence); +int radeon_semaphore_sync_obj(struct radeon_device *rdev,
	      struct radeon_semaphore *semaphore,
	      struct reservation_object *resv);
int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, int waiting_ring);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8ad4e2cfae15..b141f5bd029d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -250,32 +250,16 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority

static int radeon_cs_sync_rings(struct radeon_cs_parser *p) {
int i;

for (i = 0; i < p->nrelocs; i++) {
struct reservation_object *resv;
struct fence *fence;
struct radeon_fence *rfence;
int r;
int i, ret = 0;

for (i = 0; !ret && i < p->nrelocs; i++) { if (!p->relocs[i].robj) continue;
resv = p->relocs[i].robj->tbo.resv;
fence = reservation_object_get_excl(resv);
if (!fence)
	continue;
rfence = to_radeon_fence(fence);
if (!rfence || rfence->rdev != p->rdev) {
	r = fence_wait(fence, true);
	if (r)
		return r;
	continue;
}
radeon_semaphore_sync_to(p->ib.semaphore, rfence);
ret = radeon_semaphore_sync_obj(p->rdev, p->ib.semaphore,
				p->relocs[i].robj->tbo.resv);
}
return 0;

return ret; }

/* XXX: note that this is called from the legacy UMS CS ioctl as well */

@@ -442,6 +426,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo */ list_sort(NULL, &parser->validated, cmp_size_smaller_first);
/* must be called with all reservation_objects still held */
radeon_semaphore_free(parser->rdev, &parser->ib.semaphore,
		      parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, &parser->ib.fence->base);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 0262fe2580d2..7687a7f8f41b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -142,6 +142,8 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->ring = ring; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq);

(*fence)->cpu_cbs = NULL;

(*fence)->cpu_sema = NULL; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring);

@@ -1057,11 +1059,20 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr, return t; }

+static void __radeon_fence_destroy(struct fence *f) +{

struct radeon_fence *fence = to_radeon_fence(f);

WARN_ON(fence->cpu_cbs);

kfree(fence->cpu_cbs);

fence_free(f);

+}

const struct fence_ops radeon_fence_ops = { .get_driver_name = radeon_fence_get_driver_name, .get_timeline_name = radeon_fence_get_timeline_name, .enable_signaling = radeon_fence_enable_signaling, .signaled = radeon_fence_is_signaled, .wait = radeon_fence_default_wait,

.release = NULL,

.release = __radeon_fence_destroy, };

diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 56d9fd66d8ae..2e71463d11c5 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include "radeon.h" #include "radeon_trace.h" +#include <trace/events/fence.h>

int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -49,7 +50,11 @@ int radeon_semaphore_create(struct radeon_device *rdev, return r; } (*semaphore)->waiters = 0;

(*semaphore)->cpu_waiters = 0;

(*semaphore)->cpu_waiters_max = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);

(*semaphore)->cpu_sema = NULL;

(*semaphore)->cpu_cbs = NULL;

cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); for (i = 0; i < RADEON_NUM_SYNCS; ++i)

@@ -115,6 +120,101 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); }

+int radeon_semaphore_reserve_cpu_waiters(struct radeon_semaphore *semaphore, int add) +{
int max = 4;

struct radeon_fence_cb *cpu_cbs;

if (semaphore->cpu_waiters + add <= semaphore->cpu_waiters_max)
return 0;
if (semaphore->cpu_waiters_max)
max = semaphore->cpu_waiters_max * 2;
cpu_cbs = krealloc(semaphore->cpu_cbs, max * sizeof(*cpu_cbs), GFP_KERNEL);

if (!cpu_cbs)
return -ENOMEM;
semaphore->cpu_cbs = cpu_cbs;

semaphore->cpu_waiters_max = max;

return 0;
+}

+static void radeon_semaphore_add_cpu_cb(struct radeon_semaphore *semaphore,
			struct fence *fence)
+{
unsigned i;

struct radeon_fence_cb *empty = NULL;

for (i = 0; i < semaphore->cpu_waiters; ++i) {
struct fence *other = semaphore->cpu_cbs[i].fence;
if (!other)
	empty = &semaphore->cpu_cbs[i];
else if (other->context == fence->context) {
	semaphore->cpu_cbs[i].fence = fence_later(other, fence);
	return;
}
}

if (!empty)
empty = &semaphore->cpu_cbs[semaphore->cpu_waiters++];
empty->fence = fence;

return;
+}

+/**

radeon_semaphore_sync_obj - use the semaphore to sync to a bo

@semaphore: semaphore object to add fence to

@resv: the reservation_object to sync to

Sync the reservation_object using this semaphore.

radeon_semaphore_free must be called with all reservation_object locks

still held!!!

*/

+int radeon_semaphore_sync_obj(struct radeon_device *rdev,
	      struct radeon_semaphore *semaphore,
	      struct reservation_object *resv)
+{
struct fence *fence;

struct radeon_fence *rfence;

struct reservation_object_list *fobj;

int ret, i;

fobj = reservation_object_get_list(resv);

if (fobj && fobj->shared_count) {
ret = radeon_semaphore_reserve_cpu_waiters(semaphore, fobj->shared_count);
if (ret)
	return ret;
for (i = 0; i < fobj->shared_count; ++i) {
	fence = rcu_dereference_protected(fobj->shared[i],
				reservation_object_held(resv));
	radeon_semaphore_add_cpu_cb(semaphore, fence);
}
return 0;
}

fence = reservation_object_get_excl(resv);

if (!fence)
return 0;
rfence = to_radeon_fence(fence);

if (rfence && rfence->rdev == rdev) {
struct radeon_fence *other = semaphore->sync_to[rfence->ring];
semaphore->sync_to[rfence->ring] =
	radeon_fence_later(rfence, other);
return 0;
}

ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 1);

if (!ret)
radeon_semaphore_add_cpu_cb(semaphore, fence);
return ret;
+}

/**

radeon_semaphore_sync_rings - sync ring to all registered fences

@@ -124,6 +224,8 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,

Ensure that all registered fences are signaled before letting

the ring continue. The caller must hold the ring lock.

This function may only be called once on a semaphore.

*/ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore,

@@ -132,6 +234,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, unsigned count = 0; int i, r;
if (semaphore->cpu_waiters) {
/* allocate enough space for sync command */
if (radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
	semaphore->cpu_sema = radeon_sa_bo_cpu_addr(semaphore->sa_bo);
	semaphore->gpu_addr += 8;
	++count;
} else
	semaphore->cpu_waiters = -1;
}
    for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = semaphore->sync_to[i];
@@ -188,6 +300,68 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, return 0; }

+static void radeon_semaphore_cpu_trigger(struct fence *other_fence,
			 struct fence_cb *fence_cb)
+{

struct radeon_fence_cb *cb = (struct radeon_fence_cb*)fence_cb;

struct radeon_fence *fence = (struct radeon_fence *)cb->fence;

+#ifdef CONFIG_FENCE_TRACE
int ret = atomic_dec_return(&fence->num_cpu_cbs);

if (ret)
FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n",
	    ret, other_fence->context, other_fence->seqno);
else
+#else

if (atomic_dec_and_test(&fence->num_cpu_cbs))

+#endif
{
FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n",
	    other_fence->context, other_fence->seqno);
*fence->cpu_sema = ~0;
kfree(fence->cpu_cbs);
fence->cpu_cbs = NULL;
}
+}

+static void radeon_semaphore_arm_cpu_cbs(struct radeon_semaphore *semaphore,
			 struct radeon_fence *fence)
+{
unsigned i, skipped = 0;

fence->cpu_cbs = semaphore->cpu_cbs;

fence->cpu_sema = semaphore->cpu_sema;

atomic_set(&fence->num_cpu_cbs, semaphore->cpu_waiters);

for (i = 0; i < semaphore->cpu_waiters; ++i) {
struct fence *other = fence->cpu_cbs[i].fence;
if (other) {
	fence->cpu_cbs[i].fence = &fence->base;
	trace_fence_annotate_wait_on(&fence->base, other);
	FENCE_TRACE(&fence->base, "queued wait on %u#%u\n",
		    other->context, other->seqno);
	if (!fence_add_callback(other, &fence->cpu_cbs[i].base,
				radeon_semaphore_cpu_trigger))
		continue;
}
skipped++;
}

if (skipped && atomic_sub_and_test(skipped, &fence->num_cpu_cbs)) {
FENCE_TRACE(&fence->base, "No triggers, starting..\n");
*fence->cpu_sema = ~0;
kfree(fence->cpu_cbs);
fence->cpu_cbs = NULL;
}
+}

void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence)

@@ -195,6 +369,16 @@ void radeon_semaphore_free(struct radeon_device *rdev, if (semaphore == NULL || *semaphore == NULL) { return; }
if ((*semaphore)->cpu_cbs) {
(*semaphore)->waiters--;
if (!fence) {
	*(*semaphore)->cpu_sema = ~0U;
	kfree((*semaphore)->cpu_cbs);
} else
	radeon_semaphore_arm_cpu_cbs(*semaphore, fence);
}

if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," " hardware lockup imminent!\n", *semaphore);

Maarten Lankhorst

12:08 p.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

Hey,

Op 04-09-14 om 13:54 schreef Christian König:

...

Am 04.09.2014 um 13:42 schrieb Maarten Lankhorst:

...
Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu.

I'm not sure if this will work reliable when the semaphores are in system memory. We might need to reserve some VRAM for them instead.

Regards, Christian.

Why would it be unreliable? I mostly kept it in semaphore for simplicity.

~Maarten

Christian König

12:25 p.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

Am 04.09.2014 um 14:08 schrieb Maarten Lankhorst:

...

Hey,

Op 04-09-14 om 13:54 schreef Christian König:

...
Am 04.09.2014 um 13:42 schrieb Maarten Lankhorst:

...
Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu.

I'm not sure if this will work reliable when the semaphores are in system memory. We might need to reserve some VRAM for them instead.

Regards, Christian.

Why would it be unreliable? I mostly kept it in semaphore for simplicity.

The semaphore block tries to avoid memory accesses whenever possible.

For example when a signal for address A arrives the block doesn't necessary writes that to memory but instead tries to match it immediately with a wait for address A. Similar is true if a wait for address A arrives and the semaphore block thinks it knows the memory value at address A.

Also I'm not sure if the semaphore block really polls the memory address for changes, instead it might just snoop the MC for writes to this address. Since CPU writes to system memory aren't seen by the GPU MC the semaphore block would never know something changed.

I need to check the docs how to do this correctly, Christian.

...

~Maarten

Christian König

1:34 p.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

...

I need to check the docs how to do this correctly,

The docs don't really cover this case.

For the GPU waiting on an address there is an extra document just for this case which I don't have at hand right now. But IIRC it was recommended to use the local memory of the device waiting on the semaphore. I'm just not sure if that's for pure performance reasons to avoid accessing the bus or if there's a hard and unavoidable hardware reason to do so.

For the GPU signaling case there is a special bit in the semaphore instructions that you need to set if any user outside of the GPU should see the write.

In general it is explicitly supported to use semaphores for inter device synchronization on the bus (that's what the block is made for), but it's not intended to be used for synchronization between the CPU and the device. So I'm not sure if things like cache snooping is implemented and correctly supported.

Well I see the feature more like nice to have and needs a bunch of testing, so I would say either wait with the patch for now or make it optional to use or something like that.

Regards, Christian.

Am 04.09.2014 um 14:25 schrieb Christian König:

...

Am 04.09.2014 um 14:08 schrieb Maarten Lankhorst:

...
Hey,

Op 04-09-14 om 13:54 schreef Christian König:

...
Am 04.09.2014 um 13:42 schrieb Maarten Lankhorst:

...
Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu.

I'm not sure if this will work reliable when the semaphores are in system memory. We might need to reserve some VRAM for them instead.

Regards, Christian.

Why would it be unreliable? I mostly kept it in semaphore for simplicity.

The semaphore block tries to avoid memory accesses whenever possible.

For example when a signal for address A arrives the block doesn't necessary writes that to memory but instead tries to match it immediately with a wait for address A. Similar is true if a wait for address A arrives and the semaphore block thinks it knows the memory value at address A.

Also I'm not sure if the semaphore block really polls the memory address for changes, instead it might just snoop the MC for writes to this address. Since CPU writes to system memory aren't seen by the GPU MC the semaphore block would never know something changed.

I need to check the docs how to do this correctly, Christian.

...
~Maarten

Maarten Lankhorst

1:57 p.m.

New subject: [PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences

Hey,

Op 04-09-14 om 15:34 schreef Christian König:

...

...
I need to check the docs how to do this correctly,

The docs don't really cover this case.

For the GPU waiting on an address there is an extra document just for this case which I don't have at hand right now. But IIRC it was recommended to use the local memory of the device waiting on the semaphore. I'm just not sure if that's for pure performance reasons to avoid accessing the bus or if there's a hard and unavoidable hardware reason to do so.

For the GPU signaling case there is a special bit in the semaphore instructions that you need to set if any user outside of the GPU should see the write.

In general it is explicitly supported to use semaphores for inter device synchronization on the bus (that's what the block is made for), but it's not intended to be used for synchronization between the CPU and the device. So I'm not sure if things like cache snooping is implemented and correctly supported.

Well I see the feature more like nice to have and needs a bunch of testing, so I would say either wait with the patch for now or make it optional to use or something like that.

You're right, it's meant as something 'nice to have'. This is why it came after the patch that exports reservation_object to/from dma-buf. :-)

~Maarten

Maarten Lankhorst

11:43 a.m.

New subject: [PATCH 7/7] drm/nouveau: allow asynchronous waiting using gart fences

This requires allocating a fence sooner to annotate any cross-dev fences, and making sure that enough memory is available before emitting the fence.

The current seqno is written to the GART bo on completion, and a list of finished fences is kept to allow arbitrary depth.

Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 28 ++-- drivers/gpu/drm/nouveau/nouveau_chan.c | 6 +- drivers/gpu/drm/nouveau/nouveau_display.c | 45 ++++--- drivers/gpu/drm/nouveau/nouveau_fence.c | 212 ++++++++++++++++++++++++++---- drivers/gpu/drm/nouveau/nouveau_fence.h | 29 ++-- drivers/gpu/drm/nouveau/nouveau_gem.c | 25 ++-- drivers/gpu/drm/nouveau/nv04_fence.c | 9 +- drivers/gpu/drm/nouveau/nv10_fence.c | 9 +- drivers/gpu/drm/nouveau/nv84_fence.c | 31 +++-- drivers/gpu/drm/nouveau/nvc0_fence.c | 4 +- 10 files changed, 305 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index f89b4a7c93fe..24c941927926 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -970,21 +970,21 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, }

mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING); - ret = nouveau_fence_sync(nouveau_bo(bo), chan, true); - if (ret == 0) { + ret = nouveau_fence_new(chan, &fence); + if (ret) + goto out; + + ret = nouveau_fence_sync(nouveau_bo(bo), fence, true); + if (ret == 0) ret = drm->ttm.move(chan, bo, &bo->mem, new_mem); - if (ret == 0) { - ret = nouveau_fence_new(chan, false, &fence); - if (ret == 0) { - ret = ttm_bo_move_accel_cleanup(bo, - &fence->base, - evict, - no_wait_gpu, - new_mem); - nouveau_fence_unref(&fence); - } - } - } + if (ret == 0) + ret = nouveau_fence_emit(fence); + if (ret == 0) + ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, + no_wait_gpu, new_mem); + nouveau_fence_unref(&fence); + +out: mutex_unlock(&cli->mutex); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index d639750379d6..1e5c76dfed3a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -46,9 +46,11 @@ nouveau_channel_idle(struct nouveau_channel *chan) struct nouveau_fence *fence = NULL; int ret;

- ret = nouveau_fence_new(chan, false, &fence); + ret = nouveau_fence_new(chan, &fence); if (!ret) { - ret = nouveau_fence_wait(fence, false, false); + ret = nouveau_fence_emit(fence); + if (!ret) + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); }

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index a9ec525c0994..adbf870686aa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -26,6 +26,7 @@

#include <drm/drmP.h> #include <drm/drm_crtc_helper.h> +#include <drm/ttm/ttm_execbuf_util.h>

#include <nvif/class.h>

@@ -36,7 +37,6 @@ #include "nouveau_gem.h" #include "nouveau_connector.h" #include "nv50_display.h" - #include "nouveau_fence.h"

#include <nvif/event.h> @@ -644,7 +644,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, struct nouveau_bo *old_bo, struct nouveau_bo *new_bo, struct nouveau_page_flip_state *s, - struct nouveau_fence **pfence) + struct nouveau_fence *fence) { struct nouveau_fence_chan *fctx = chan->fence; struct nouveau_drm *drm = chan->drm; @@ -657,11 +657,6 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, list_add_tail(&s->head, &fctx->flip); spin_unlock_irqrestore(&dev->event_lock, flags);

- /* Synchronize with the old framebuffer */ - ret = nouveau_fence_sync(old_bo, chan, false); - if (ret) - goto fail; - /* Emit the pageflip */ ret = RING_SPACE(chan, 2); if (ret) @@ -674,7 +669,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, OUT_RING (chan, 0x00000000); FIRE_RING (chan);

- ret = nouveau_fence_new(chan, false, pfence); + ret = nouveau_fence_emit(fence); if (ret) goto fail;

@@ -700,6 +695,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_cli *cli; struct nouveau_fence *fence; int ret; + struct ttm_validate_buffer resv[2] = { + { .bo = &old_bo->bo }, + { .bo = &new_bo->bo }, + }; + struct ww_acquire_ctx ticket; + LIST_HEAD(res);

chan = drm->channel; if (!chan) @@ -714,28 +715,31 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM); if (ret) goto fail_free; + list_add(&resv[1].head, &res); } + list_add(&resv[0].head, &res);

mutex_lock(&cli->mutex); - ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL); + ret = nouveau_fence_new(chan, &fence); if (ret) goto fail_unpin;

- /* synchronise rendering channel with the kernel's channel */ - ret = nouveau_fence_sync(new_bo, chan, false); - if (ret) { - ttm_bo_unreserve(&new_bo->bo); + ret = ttm_eu_reserve_buffers(&ticket, &res, true); + if (ret) goto fail_unpin; - }

if (new_bo != old_bo) { - ttm_bo_unreserve(&new_bo->bo); - - ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); + /* synchronise rendering channel with the kernel's channel */ + ret = nouveau_fence_sync(new_bo, fence, false); if (ret) - goto fail_unpin; + goto fail_unreserve; }

+ /* Synchronize with the old framebuffer */ + ret = nouveau_fence_sync(old_bo, fence, false); + if (ret) + goto fail_unreserve; + /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) { { }, event, nouveau_crtc(crtc)->index, @@ -772,7 +776,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, nouveau_bo_ref(new_bo, &dispnv04->image[head]); }

- ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); + ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, fence); if (ret) goto fail_unreserve; mutex_unlock(&cli->mutex); @@ -781,7 +785,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, crtc->primary->fb = fb;

nouveau_bo_fence(old_bo, fence, false); - ttm_bo_unreserve(&old_bo->bo); + ttm_eu_backoff_reservation(&ticket, &res); if (old_bo != new_bo) nouveau_bo_unpin(old_bo); nouveau_fence_unref(&fence); @@ -792,6 +796,7 @@ fail_unreserve: ttm_bo_unreserve(&old_bo->bo); fail_unpin: mutex_unlock(&cli->mutex); + nouveau_fence_unref(&fence); if (old_bo != new_bo) nouveau_bo_unpin(new_bo); fail_free: diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 574517a396fd..b1a1f0bfbe5a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -143,6 +143,8 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha struct nouveau_fence_priv *priv = (void*)chan->drm->fence; int ret;

+ spin_lock_init(&fctx->trigger_lock); + INIT_LIST_HEAD(&fctx->triggers); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -218,33 +220,128 @@ err: func(data); }

+static void nouveau_fence_cpu_triggered(struct nouveau_fence *fence) +{ + struct nouveau_channel *chan = fence->channel; + struct nouveau_fence_chan *fctx = chan->fence; + u32 any_seq = false, seq = ~0U; + + /* unblock fence, this function is called with irqs disabled */ + kfree(fence->waiters); + fence->waiters = NULL; + + spin_lock(&fctx->trigger_lock); + + /* + * signal all fences for which waiters == NULL until the + * first entry is found for which this is not true. + * + * This allows the wait >= seq op to work correctly on sysmem. + */ + while (!list_empty(&fctx->triggers)) { + struct nouveau_fence *chk = list_entry(fctx->triggers.next, + struct nouveau_fence, + trigger); + + if (chk->waiters) + break; + + any_seq = true; + seq = chk->base.seqno; + + list_del(&chk->trigger); + fence_put(&chk->base); + } + + if (any_seq) + fctx->signal_sysmem(chan, seq); + + spin_unlock(&fctx->trigger_lock); +} + +static void nouveau_fence_cpu_trigger(struct fence *other_fence, + struct fence_cb *fence_cb) +{ + struct nouveau_fence_cb *cb = (struct nouveau_fence_cb*)fence_cb; + struct nouveau_fence *fence = (struct nouveau_fence *)cb->fence; + +#ifdef CONFIG_FENCE_TRACE + int ret = atomic_dec_return(&fence->readers); + + if (ret) + FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n", + ret, other_fence->context, other_fence->seqno); + else +#else + if (atomic_dec_and_test(&fence->readers)) +#endif + { + FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n", + other_fence->context, other_fence->seqno); + + nouveau_fence_cpu_triggered(fence); + } +} + +static void +nouveau_fence_emit_waiters(struct nouveau_fence *fence, + struct nouveau_fence_chan *fctx) +{ + unsigned i, skipped = 0; + + atomic_set(&fence->readers, fence->num_waiters); + + /* add to triggers */ + fence_get(&fence->base); + spin_lock_irq(&fctx->trigger_lock); + list_add_tail(&fence->trigger, &fctx->triggers); + spin_unlock_irq(&fctx->trigger_lock); + + for (i = 0; i < fence->num_waiters; ++i) { + struct fence *other = fence->waiters[i].fence; + + if (other) { + fence->waiters[i].fence = &fence->base; + trace_fence_annotate_wait_on(&fence->base, other); + + FENCE_TRACE(&fence->base, "queued wait on %u#%u\n", + other->context, other->seqno); + + if (!fence_add_callback(other, &fence->waiters[i].base, + nouveau_fence_cpu_trigger)) + continue; + } + skipped++; + } + + if (skipped && atomic_sub_and_test(skipped, &fence->readers)) { + FENCE_TRACE(&fence->base, "No triggers, starting..\n"); + + nouveau_fence_cpu_triggered(fence); + } +} + int -nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) +nouveau_fence_emit(struct nouveau_fence *fence) { + struct nouveau_channel *chan = fence->channel; struct nouveau_fence_chan *fctx = chan->fence; - struct nouveau_fence_priv *priv = (void*)chan->drm->fence; int ret;

- fence->channel = chan; + WARN(fence->head.next, "fence is emitted twice!\n"); fence->timeout = jiffies + (15 * HZ);

- if (priv->uevent) - fence_init(&fence->base, &nouveau_fence_ops_uevent, - &fctx->lock, - priv->context_base + chan->chid, ++fctx->sequence); - else - fence_init(&fence->base, &nouveau_fence_ops_legacy, - &fctx->lock, - priv->context_base + chan->chid, ++fctx->sequence); - trace_fence_emit(&fence->base); - ret = fctx->emit(fence); + ret = fctx->emit(fence, false); if (!ret) { fence_get(&fence->base); spin_lock_irq(&fctx->lock); nouveau_fence_update(chan, fctx); list_add_tail(&fence->head, &fctx->pending); spin_unlock_irq(&fctx->lock); + + if (fence->num_waiters) + nouveau_fence_emit_waiters(fence, fctx); }

return ret; @@ -345,9 +442,58 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) return 0; }

+static int nouveau_fence_reserve_waiter(struct nouveau_fence *fence) +{ + int max = 8; + struct nouveau_fence_cb *waiters; + + if (fence->num_waiters + 1 <= fence->max_waiters) + return 0; + + if (fence->max_waiters) + max = fence->max_waiters * 2; + + waiters = krealloc(fence->waiters, max * sizeof(*waiters), GFP_KERNEL); + if (!waiters) + return -ENOMEM; + fence->waiters = waiters; + fence->max_waiters = max; + return 0; +} + +static int nouveau_fence_add_fence_list(struct nouveau_fence *fence, + struct fence *victim) +{ + struct nouveau_fence_cb *empty = NULL; + unsigned i; + int ret; + + for (i = 0; i < fence->num_waiters; ++i) { + struct fence *other = fence->waiters[i].fence; + + if (!other) + empty = &fence->waiters[i]; + else if (other->context == victim->context) { + fence->waiters[i].fence = fence_later(other, victim); + return 0; + } + } + + if (!empty) { + ret = nouveau_fence_reserve_waiter(fence); + if (ret) + return ret; + empty = &fence->waiters[fence->num_waiters++]; + } + + empty->fence = victim; + return 0; +} + int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_fence *nvfence, bool exclusive) { + struct nouveau_channel *chan = nvfence->channel; struct nouveau_fence_chan *fctx = chan->fence; struct fence *fence; struct reservation_object *resv = nvbo->bo.resv; @@ -371,6 +517,8 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e f = nouveau_local_fence(fence, chan->drm); if (f) prev = f->channel; + else if (fctx->signal_sysmem) + return nouveau_fence_add_fence_list(nvfence, fence);

if (!prev || (prev != chan && (ret = fctx->sync(f, prev, chan)))) ret = fence_wait(fence, true); @@ -390,6 +538,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e f = nouveau_local_fence(fence, chan->drm); if (f) prev = f->channel; + else if (fctx->signal_sysmem) { + ret = nouveau_fence_add_fence_list(nvfence, fence); + if (ret) + break; + }

if (!prev || (ret = fctx->sync(f, prev, chan))) ret = fence_wait(fence, true); @@ -404,15 +557,22 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e void nouveau_fence_unref(struct nouveau_fence **pfence) { - if (*pfence) - fence_put(&(*pfence)->base); + struct nouveau_fence *fence = *pfence; + + if (!fence) + return; + *pfence = NULL; + fence_put(&fence->base); }

int -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, +nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence) { + struct nouveau_fifo_chan *fifo = (void*)chan->object; + struct nouveau_fence_priv *priv = (void*)chan->drm->fence; + struct nouveau_fence_chan *fctx = chan->fence; struct nouveau_fence *fence; int ret = 0;

@@ -423,11 +583,11 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, if (!fence) return -ENOMEM;

- fence->sysmem = sysmem; + fence->channel = chan;

- ret = nouveau_fence_emit(fence, chan); - if (ret) - nouveau_fence_unref(&fence); + fence_init(&fence->base, priv->uevent ? &nouveau_fence_ops_uevent : + &nouveau_fence_ops_legacy, &fctx->lock, + priv->context_base + fifo->chid, ++fctx->sequence);

*pfence = fence; return ret; @@ -486,13 +646,21 @@ static bool nouveau_fence_no_signaling(struct fence *f) return true; }

+static void nouveau_fence_release(struct fence *f) +{ + struct nouveau_fence *fence = from_fence(f); + + kfree(fence->waiters); + fence_free(&fence->base); +} + static const struct fence_ops nouveau_fence_ops_legacy = { .get_driver_name = nouveau_fence_get_get_driver_name, .get_timeline_name = nouveau_fence_get_timeline_name, .enable_signaling = nouveau_fence_no_signaling, .signaled = nouveau_fence_is_signaled, .wait = nouveau_fence_wait_legacy, - .release = NULL + .release = nouveau_fence_release };

static bool nouveau_fence_enable_signaling(struct fence *f) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 986c8135e564..f2a56c940a2c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -12,33 +12,41 @@ struct nouveau_fence {

struct list_head head;

- bool sysmem; - struct nouveau_channel *channel; unsigned long timeout; + + atomic_t readers; + struct list_head trigger; + struct nouveau_fence_cb { + struct fence_cb base; + struct fence *fence; + } *waiters; + int num_waiters, max_waiters; };

-int nouveau_fence_new(struct nouveau_channel *, bool sysmem, +int nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); void nouveau_fence_unref(struct nouveau_fence **);

-int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); +int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); void nouveau_fence_work(struct fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); -int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive); +int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_fence *fence, bool exclusive);

struct nouveau_fence_chan { spinlock_t lock; struct list_head pending; struct list_head flip;

- int (*emit)(struct nouveau_fence *); + spinlock_t trigger_lock; + struct list_head triggers; + + int (*emit)(struct nouveau_fence *, bool); int (*sync)(struct nouveau_fence *, struct nouveau_channel *, struct nouveau_channel *); u32 (*read)(struct nouveau_channel *); - int (*emit32)(struct nouveau_channel *, u64, u32); - int (*sync32)(struct nouveau_channel *, u64, u32); + void (*signal_sysmem)(struct nouveau_channel *, u32 seq);

u32 sequence; u32 context; @@ -67,7 +75,7 @@ void nouveau_fence_context_del(struct nouveau_fence_chan *); int nv04_fence_create(struct nouveau_drm *); int nv04_fence_mthd(struct nouveau_channel *, u32, u32, u32);

-int nv10_fence_emit(struct nouveau_fence *); +int nv10_fence_emit(struct nouveau_fence *, bool sysmem); int nv17_fence_sync(struct nouveau_fence *, struct nouveau_channel *, struct nouveau_channel *); u32 nv10_fence_read(struct nouveau_channel *); @@ -86,6 +94,9 @@ int nouveau_flip_complete(void *chan);

struct nv84_fence_chan { struct nouveau_fence_chan base; + int (*emit32)(struct nouveau_channel *, u64, u32); + int (*sync32)(struct nouveau_channel *, u64, u32); + struct nouveau_vma vma; struct nouveau_vma vma_gart; struct nouveau_vma dispc_vma[4]; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 1bc4eb33b60f..e6f11a60c453 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -433,7 +433,7 @@ retry: static int validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, struct list_head *list, struct drm_nouveau_gem_pushbuf_bo *pbbo, - uint64_t user_pbbo_ptr) + uint64_t user_pbbo_ptr, struct nouveau_fence *fence) { struct nouveau_drm *drm = chan->drm; struct drm_nouveau_gem_pushbuf_bo __user *upbbo = @@ -459,7 +459,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, return ret; }

- ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains); + ret = nouveau_fence_sync(nvbo, fence, !!b->write_domains); if (unlikely(ret)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "fail post-validate sync\n"); @@ -496,7 +496,8 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, struct drm_file *file_priv, struct drm_nouveau_gem_pushbuf_bo *pbbo, uint64_t user_buffers, int nr_buffers, - struct validate_op *op, int *apply_relocs) + struct validate_op *op, int *apply_relocs, + struct nouveau_fence *fence) { struct nouveau_cli *cli = nouveau_cli(file_priv); int ret; @@ -513,7 +514,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, return ret; }

- ret = validate_list(chan, cli, &op->list, pbbo, user_buffers); + ret = validate_list(chan, cli, &op->list, pbbo, user_buffers, fence); if (unlikely(ret < 0)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "validating bo list\n"); @@ -707,9 +708,14 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } }

+ ret = nouveau_fence_new(chan, &fence); + if (ret) + goto out_prevalid; + /* Validate buffer list */ ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers, - req->nr_buffers, &op, &do_reloc); + req->nr_buffers, &op, &do_reloc, + fence); if (ret) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "validate: %d\n", ret); @@ -793,18 +799,21 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } }

- ret = nouveau_fence_new(chan, false, &fence); + ret = nouveau_fence_emit(fence); if (ret) { NV_PRINTK(error, cli, "error fencing pushbuf: %d\n", ret); WIND_RING(chan); goto out; }

-out: validate_fini(&op, fence, bo); - nouveau_fence_unref(&fence); + +out: + if (ret) + validate_fini(&op, NULL, bo);

out_prevalid: + nouveau_fence_unref(&fence); u_free(bo); u_free(push);

diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index 4484131d826a..de4d69166a37 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -35,10 +35,15 @@ struct nv04_fence_priv { };

static int -nv04_fence_emit(struct nouveau_fence *fence) +nv04_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; - int ret = RING_SPACE(chan, 2); + int ret; + + if (sysmem) + return -ENODEV; + + ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NV04(chan, NvSubSw, 0x0150, 1); OUT_RING (chan, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c index 737d066ffc60..1608b0acfe0b 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.c +++ b/drivers/gpu/drm/nouveau/nv10_fence.c @@ -27,10 +27,15 @@ #include "nv10_fence.h"

int -nv10_fence_emit(struct nouveau_fence *fence) +nv10_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; - int ret = RING_SPACE(chan, 2); + int ret; + + if (sysmem) + return -ENODEV; + + ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1); OUT_RING (chan, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 7b372a68aa4e..84fc0c3c5c9a 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -71,18 +71,18 @@ nv84_fence_sync32(struct nouveau_channel *chan, u64 virtual, u32 sequence) }

static int -nv84_fence_emit(struct nouveau_fence *fence) +nv84_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; struct nv84_fence_chan *fctx = chan->fence; u64 addr = chan->chid * 16;

- if (fence->sysmem) + if (sysmem) addr += fctx->vma_gart.offset; else addr += fctx->vma.offset;

- return fctx->base.emit32(chan, addr, fence->base.seqno); + return fctx->emit32(chan, addr, fence->base.seqno); }

static int @@ -92,12 +92,9 @@ nv84_fence_sync(struct nouveau_fence *fence, struct nv84_fence_chan *fctx = chan->fence; u64 addr = prev->chid * 16;

- if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + addr += fctx->vma.offset;

- return fctx->base.sync32(chan, addr, fence->base.seqno); + return fctx->sync32(chan, addr, fence->base.seqno); }

static u32 @@ -108,6 +105,15 @@ nv84_fence_read(struct nouveau_channel *chan) }

static void +nv84_fence_signal_sysmem(struct nouveau_channel *chan, u32 seq) +{ + struct nouveau_fifo_chan *fifo = (void *)chan->object; + struct nv84_fence_priv *priv = chan->drm->fence; + + return nouveau_bo_wr32(priv->bo_gart, fifo->chid * 16/4, seq); +} + +static void nv84_fence_context_del(struct nouveau_channel *chan) { struct drm_device *dev = chan->drm->dev; @@ -140,12 +146,15 @@ nv84_fence_context_new(struct nouveau_channel *chan) return -ENOMEM;

nouveau_fence_context_new(chan, &fctx->base); + fctx->base.emit = nv84_fence_emit; fctx->base.sync = nv84_fence_sync; fctx->base.read = nv84_fence_read; - fctx->base.emit32 = nv84_fence_emit32; - fctx->base.sync32 = nv84_fence_sync32; + fctx->base.signal_sysmem = nv84_fence_signal_sysmem; fctx->base.sequence = nv84_fence_read(chan); + nouveau_bo_wr32(priv->bo_gart, chan->chid * 16/4, fctx->base.sequence); + fctx->emit32 = nv84_fence_emit32; + fctx->sync32 = nv84_fence_sync32;

ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma); if (ret == 0) { @@ -159,8 +168,6 @@ nv84_fence_context_new(struct nouveau_channel *chan) ret = nouveau_bo_vma_add(bo, cli->vm, &fctx->dispc_vma[i]); }

- nouveau_bo_wr32(priv->bo, chan->chid * 16/4, 0x00000000); - if (ret) nv84_fence_context_del(chan); return ret; diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c index becf19abda2d..612689a5e35a 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fence.c +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c @@ -66,8 +66,8 @@ nvc0_fence_context_new(struct nouveau_channel *chan) int ret = nv84_fence_context_new(chan); if (ret == 0) { struct nv84_fence_chan *fctx = chan->fence; - fctx->base.emit32 = nvc0_fence_emit32; - fctx->base.sync32 = nvc0_fence_sync32; + fctx->emit32 = nvc0_fence_emit32; + fctx->sync32 = nvc0_fence_sync32; } return ret; }

-- 2.0.4

3909

Age (days ago)

3909

Last active (days ago)

dri-devel@lists.freedesktop.org

13 comments

2 participants

tags (0)

participants (2)

Christian König
Maarten Lankhorst