On Wed, Apr 12, 2017 at 12:12:02PM -0700, Eric Anholt wrote:
This is needed for proper synchronization with display on another DRM device (pl111 or tinydrm) with buffers produced by vc4 V3D. Fixes the new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2 desktop tests on pl111+vc4.
This doesn't yet introduce waits on other device's fences before vc4's rendering/display, because I don't have testcases for them.
v2: Reuse dma_fence_free(), retitle commit message to clarify that it's not a full dma-buf fencing implementation yet.
Signed-off-by: Eric Anholt eric@anholt.net
Double-checked a few things in your ww_mutex scheme, seems are correct. And testing with CONFIG_DEBUG_WW_MUTEX_SLOWPATH should catch any kind of fumbles in your error paths. I didnt do a full review, so just
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/vc4/Makefile | 1 + drivers/gpu/drm/vc4/vc4_bo.c | 37 ++++++++++- drivers/gpu/drm/vc4/vc4_drv.c | 3 +- drivers/gpu/drm/vc4/vc4_drv.h | 30 +++++++++ drivers/gpu/drm/vc4/vc4_fence.c | 56 +++++++++++++++++ drivers/gpu/drm/vc4/vc4_gem.c | 136 +++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/vc4/vc4_irq.c | 4 ++ 7 files changed, 262 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/vc4/vc4_fence.c
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 61f45d122bd0..ab687fba4916 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -9,6 +9,7 @@ vc4-y := \ vc4_drv.o \ vc4_dpi.o \ vc4_dsi.o \
- vc4_fence.o \ vc4_kms.o \ vc4_gem.o \ vc4_hdmi.o \
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index af29432a6471..80b2f9e55c5c 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -19,6 +19,8 @@
- rendering can return quickly.
*/
+#include <linux/dma-buf.h>
#include "vc4_drv.h" #include "uapi/drm/vc4_drm.h"
@@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
vc4->bo_stats.num_allocated--; vc4->bo_stats.size_allocated -= obj->size;
- if (bo->resv == &bo->_resv)
reservation_object_fini(bo->resv);
- drm_gem_cma_free_object(obj);
}
@@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return ERR_PTR(-ENOMEM); } }
- bo = to_vc4_bo(&cma_obj->base);
- return to_vc4_bo(&cma_obj->base);
- bo->resv = &bo->_resv;
- reservation_object_init(bo->resv);
- return bo;
}
int vc4_dumb_create(struct drm_file *file_priv, @@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data) schedule_work(&vc4->bo_cache.time_work); }
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) +{
- struct vc4_bo *bo = to_vc4_bo(obj);
- return bo->resv;
+}
struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { @@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj) return drm_gem_cma_prime_vmap(obj); }
+struct drm_gem_object * +vc4_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sgt)
+{
- struct drm_gem_object *obj;
- struct vc4_bo *bo;
- obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
- if (IS_ERR(obj))
return obj;
- bo = to_vc4_bo(obj);
- bo->resv = attach->dmabuf->resv;
- return obj;
+}
int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 61e674baf3a6..92fb9a41fe7c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, .gem_prime_export = vc4_prime_export,
- .gem_prime_res_obj = vc4_prime_res_obj, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
- .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
- .gem_prime_import_sg_table = vc4_prime_import_sg_table, .gem_prime_vmap = vc4_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = vc4_prime_mmap,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index dea304966e65..08d5c2213c80 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -8,7 +8,9 @@
#include "drmP.h" #include "drm_gem_cma_helper.h" +#include "drm_gem_cma_helper.h"
+#include <linux/reservation.h> #include <drm/drm_encoder.h>
struct vc4_dev { @@ -56,6 +58,8 @@ struct vc4_dev { /* Protects bo_cache and the BO stats. */ struct mutex bo_lock;
- uint64_t dma_fence_context;
- /* Sequence number for the last job queued in bin_job_list.
*/
- Starts at 0 (no jobs emitted).
@@ -150,6 +154,10 @@ struct vc4_bo { * DRM_IOCTL_VC4_CREATE_SHADER_BO. */ struct vc4_validated_shader_info *validated_shader;
- /* normally (resv == &_resv) except for imported bo's */
- struct reservation_object *resv;
- struct reservation_object _resv;
};
static inline struct vc4_bo * @@ -158,6 +166,19 @@ to_vc4_bo(struct drm_gem_object *bo) return (struct vc4_bo *)bo; }
+struct vc4_fence {
- struct dma_fence base;
- struct drm_device *dev;
- /* vc4 seqno for signaled() test */
- uint64_t seqno;
+};
+static inline struct vc4_fence * +to_vc4_fence(struct dma_fence *fence) +{
- return (struct vc4_fence *)fence;
+}
struct vc4_seqno_cb { struct work_struct work; uint64_t seqno; @@ -231,6 +252,8 @@ struct vc4_exec_info { /* Latest write_seqno of any BO that binning depends on. */ uint64_t bin_dep_seqno;
- struct dma_fence *fence;
- /* Last current addresses the hardware was processing when the
*/
- hangcheck timer checked on us.
@@ -437,7 +460,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sgt);
void *vc4_prime_vmap(struct drm_gem_object *obj); void vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); @@ -469,6 +496,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); extern struct platform_driver vc4_dsi_driver; int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
+/* vc4_fence.c */ +extern const struct dma_fence_ops vc4_fence_ops;
/* vc4_gem.c */ void vc4_gem_init(struct drm_device *dev); void vc4_gem_destroy(struct drm_device *dev); diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c new file mode 100644 index 000000000000..dbf5a5a5d5f5 --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_fence.c @@ -0,0 +1,56 @@ +/*
- Copyright © 2017 Broadcom
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the "Software"),
- to deal in the Software without restriction, including without limitation
- the rights to use, copy, modify, merge, publish, distribute, sublicense,
- and/or sell copies of the Software, and to permit persons to whom the
- Software is furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice (including the next
- paragraph) shall be included in all copies or substantial portions of the
- Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- IN THE SOFTWARE.
- */
+#include "vc4_drv.h"
+static const char *vc4_fence_get_driver_name(struct dma_fence *fence) +{
- return "vc4";
+}
+static const char *vc4_fence_get_timeline_name(struct dma_fence *fence) +{
- return "vc4-v3d";
+}
+static bool vc4_fence_enable_signaling(struct dma_fence *fence) +{
- return true;
+}
+static bool vc4_fence_signaled(struct dma_fence *fence) +{
- struct vc4_fence *f = to_vc4_fence(fence);
- struct vc4_dev *vc4 = to_vc4_dev(f->dev);
- return vc4->finished_seqno >= f->seqno;
+}
+const struct dma_fence_ops vc4_fence_ops = {
- .get_driver_name = vc4_fence_get_driver_name,
- .get_timeline_name = vc4_fence_get_timeline_name,
- .enable_signaling = vc4_fence_enable_signaling,
- .signaled = vc4_fence_signaled,
- .wait = dma_fence_default_wait,
- .release = dma_fence_free,
+}; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e9c381c42139..a1a01044263c 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->bo_count; i++) { bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno;
reservation_object_add_shared_fence(bo->resv, exec->fence);
}
list_for_each_entry(bo, &exec->unref_list, unref_head) {
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->rcl_write_bo_count; i++) { bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno;
reservation_object_add_excl_fence(bo->resv, exec->fence);
- }
+}
+static void +vc4_unlock_bo_reservations(struct drm_device *dev,
struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
+{
- int i;
- for (i = 0; i < exec->bo_count; i++) {
struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
}ww_mutex_unlock(&bo->resv->lock);
- ww_acquire_fini(acquire_ctx);
+}
+/* Takes the reservation lock on all the BOs being referenced, so that
- at queue submit time we can update the reservations.
- We don't lock the RCL the tile alloc/state BOs, or overflow memory
- (all of which are on exec->unref_list). They're entirely private
- to vc4, so we don't attach dma-buf fences to them.
- */
+static int +vc4_lock_bo_reservations(struct drm_device *dev,
struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
+{
- int contended_lock = -1;
- int i, ret;
- struct vc4_bo *bo;
- ww_acquire_init(acquire_ctx, &reservation_ww_class);
+retry:
- if (contended_lock != -1) {
bo = to_vc4_bo(&exec->bo[contended_lock]->base);
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
acquire_ctx);
if (ret) {
ww_acquire_done(acquire_ctx);
return ret;
}
- }
- for (i = 0; i < exec->bo_count; i++) {
if (i == contended_lock)
continue;
bo = to_vc4_bo(&exec->bo[i]->base);
ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
if (ret) {
int j;
for (j = 0; j < i; j++) {
bo = to_vc4_bo(&exec->bo[j]->base);
ww_mutex_unlock(&bo->resv->lock);
}
if (contended_lock != -1 && contended_lock >= i) {
bo = to_vc4_bo(&exec->bo[contended_lock]->base);
ww_mutex_unlock(&bo->resv->lock);
}
if (ret == -EDEADLK) {
contended_lock = i;
goto retry;
}
ww_acquire_done(acquire_ctx);
return ret;
}
- }
- ww_acquire_done(acquire_ctx);
- /* Reserve space for our shared (read-only) fence references,
* before we commit the CL to the hardware.
*/
- for (i = 0; i < exec->bo_count; i++) {
bo = to_vc4_bo(&exec->bo[i]->base);
ret = reservation_object_reserve_shared(bo->resv);
if (ret) {
vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
return ret;
}
- }
- return 0;
}
/* Queues a struct vc4_exec_info for execution. If no job is @@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
- then bump the end address. That's a change for a later date,
- though.
*/ -static void -vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) +static int +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
{ struct vc4_dev *vc4 = to_vc4_dev(dev); uint64_t seqno; unsigned long irqflags;
struct vc4_fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (!fence)
return -ENOMEM;
fence->dev = dev;
spin_lock_irqsave(&vc4->job_lock, irqflags);
seqno = ++vc4->emit_seqno; exec->seqno = seqno;
dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
vc4->dma_fence_context, exec->seqno);
fence->seqno = exec->seqno;
exec->fence = &fence->base;
vc4_update_bo_seqnos(exec, seqno);
vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
list_add_tail(&exec->head, &vc4->bin_job_list);
/* If no job was executing, kick ours off. Otherwise, it'll
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) }
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- return 0;
}
/** @@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned i;
- /* If we got force-completed because of GPU reset rather than
* through our IRQ handler, signal the fence now.
*/
- if (exec->fence)
dma_fence_signal(exec->fence);
- if (exec->bo) { for (i = 0; i < exec->bo_count; i++) drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
@@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec;
struct ww_acquire_ctx acquire_ctx; int ret = 0;
if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
@@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail;
- ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
- if (ret)
goto fail;
- /* Clear this out of the struct we'll be putting in the queue,
*/ exec->args = NULL;
- since it's part of our stack.
- vc4_queue_submit(dev, exec);
ret = vc4_queue_submit(dev, exec, &acquire_ctx);
if (ret)
goto fail;
/* Return the seqno for our job. */ args->seqno = vc4->emit_seqno;
@@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev);
- vc4->dma_fence_context = dma_fence_context_alloc(1);
- INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list);
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index cdc6e6760705..1384af9fc987 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -142,6 +142,10 @@ vc4_irq_finish_render_job(struct drm_device *dev)
vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list);
if (exec->fence) {
dma_fence_signal_locked(exec->fence);
exec->fence = NULL;
} vc4_submit_next_render_job(dev);
wake_up_all(&vc4->job_wait_queue);
-- 2.11.0
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel