v2 drops the extra syncobj parameter and gets rid of the submit flags since 0 is never a valid syncobj handle.
This series allows userspace to submit syncobj handles for importing a fence to wait on or exporting the job fence as part of submission.
The primary use of this is to enable native fence fd support in Mesa. Userspace implementation is under review and available here:
https://patchwork.freedesktop.org/series/42081/
Stefan Schake (3): drm/vc4: Syncobj import support drm/vc4: Export fence through syncobj drm/vc4: Enable syncobj support
drivers/gpu/drm/vc4/vc4_drv.c | 3 ++- drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 60 ++++++++++++++++++++++++++++++++++++++----- include/uapi/drm/vc4_drm.h | 13 +++++++--- 4 files changed, 65 insertions(+), 12 deletions(-)
Allow userland to specify a syncobj that is waited on before a render job starts processing.
v2: Use 0 as invalid syncobj to drop flag (Eric) Drop extra newline (Eric)
Signed-off-by: Stefan Schake stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 30 +++++++++++++++++++++++++----- include/uapi/drm/vc4_drm.h | 7 +++---- 3 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 22589d3..554a4e8 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -11,6 +11,7 @@ #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_atomic.h> +#include <drm/drm_syncobj.h>
#include "uapi/drm/vc4_drm.h"
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0d..e305ccde 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h>
#include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -1115,6 +1116,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0;
if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1125,11 +1127,6 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; }
- if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); - return -EINVAL; - } - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1164,6 +1161,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } }
+ if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index b95a0e1..d97065b 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -183,11 +183,10 @@ struct drm_vc4_submit_cl { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmonid;
- /* Unused field to align this struct on 64 bits. Must be set to 0. - * If one ever needs to add an u32 field to this struct, this field - * can be used. + /* Syncobj handle to wait on. If set, processing of this render job + * will not start until the syncobj is signaled. 0 means ignore. */ - __u32 pad2; + __u32 in_sync; };
/**
Allow specifying a syncobj on render job submission where we store the fence for the job. This gives userland flexible access to the fence.
v2: Use 0 as invalid syncobj to drop flag (Eric) Don't reintroduce the padding (Eric)
Signed-off-by: Stefan Schake stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_gem.c | 30 ++++++++++++++++++++++++++++-- include/uapi/drm/vc4_drm.h | 6 ++++++ 2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e305ccde..a4c4be3 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -656,7 +656,8 @@ vc4_lock_bo_reservations(struct drm_device *dev, */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -679,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base;
+ if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno);
vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1114,6 +1118,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; struct dma_fence *in_fence; @@ -1201,12 +1206,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail;
+ if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL;
- ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail;
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index d97065b..2be4fe3 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -187,6 +187,12 @@ struct drm_vc4_submit_cl { * will not start until the syncobj is signaled. 0 means ignore. */ __u32 in_sync; + + /* Syncobj handle to export fence to. If set, the fence in the syncobj + * will be replaced with a fence that signals upon completion of this + * render job. 0 means ignore. + */ + __u32 out_sync; };
/**
This doesn't require any additional functionality from the driver but is a prerequisite to userland calling the syncobj ioctls.
Signed-off-by: Stefan Schake stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 40ddeaa..d9b8b70 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -175,7 +175,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_RENDER | - DRIVER_PRIME), + DRIVER_PRIME | + DRIVER_SYNCOBJ), .lastclose = drm_fb_helper_lastclose, .open = vc4_open, .postclose = vc4_close,
Stefan Schake stschake@gmail.com writes:
v2 drops the extra syncobj parameter and gets rid of the submit flags since 0 is never a valid syncobj handle.
This series allows userspace to submit syncobj handles for importing a fence to wait on or exporting the job fence as part of submission.
The primary use of this is to enable native fence fd support in Mesa. Userspace implementation is under review and available here:
This series was a joy to read. I've reviewed and pushed it to drm-misc-next, and we can land the Mesa side once it's made its way to drm-next.
dri-devel@lists.freedesktop.org