On 30/09/2021 20:09, Boris Brezillon wrote:
This should help limit the number of ioctls when submitting multiple jobs. The new ioctl also supports syncobj timelines and BO access flags.
v5:
- Fix typos
- Add BUILD_BUG_ON() checks to make sure SUBMIT_BATCH_VERSION and descriptor sizes are synced
- Simplify error handling in panfrost_ioctl_batch_submit()
- Don't disable implicit fences on exclusive references
v4:
- Implement panfrost_ioctl_submit() as a wrapper around panfrost_submit_job()
- Replace stride fields by a version field which is mapped to a <job_stride,bo_ref_stride,syncobj_ref_stride> tuple internally
v3:
- Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the old submit path
Signed-off-by: Boris Brezillon boris.brezillon@collabora.com
Reviewed-by: Steven Price steven.price@arm.com
drivers/gpu/drm/panfrost/panfrost_drv.c | 584 ++++++++++++++++-------- drivers/gpu/drm/panfrost/panfrost_job.c | 4 +- include/uapi/drm/panfrost_drm.h | 92 ++++ 3 files changed, 492 insertions(+), 188 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index f8f430f68090..30dc158d56e6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -147,193 +147,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) return 0; }
-/**
- panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
- referenced by the job.
- @dev: DRM device
- @file_priv: DRM file for this fd
- @args: IOCTL args
- @job: job being set up
- Resolve handles from userspace to BOs and attach them to job.
- Note that this function doesn't need to unreference the BOs on
- failure, because that will happen at panfrost_job_cleanup() time.
- */
-static int -panfrost_lookup_bos(struct drm_device *dev,
struct drm_file *file_priv,
struct drm_panfrost_submit *args,
struct panfrost_job *job)
-{
- unsigned int i;
- int ret;
- job->bo_count = args->bo_handle_count;
- if (!job->bo_count)
return 0;
- job->bo_flags = kvmalloc_array(job->bo_count,
sizeof(*job->bo_flags),
GFP_KERNEL | __GFP_ZERO);
- if (!job->bo_flags)
return -ENOMEM;
- for (i = 0; i < job->bo_count; i++)
job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE;
- ret = drm_gem_objects_lookup(file_priv,
(void __user *)(uintptr_t)args->bo_handles,
job->bo_count, &job->bos);
- if (ret)
return ret;
- return panfrost_get_job_mappings(file_priv, job);
-}
-/**
- panfrost_copy_in_sync() - Sets up job->deps with the sync objects
- referenced by the job.
- @dev: DRM device
- @file_priv: DRM file for this fd
- @args: IOCTL args
- @job: job being set up
- Resolve syncobjs from userspace to fences and attach them to job.
- Note that this function doesn't need to unreference the fences on
- failure, because that will happen at panfrost_job_cleanup() time.
- */
-static int -panfrost_copy_in_sync(struct drm_device *dev,
struct drm_file *file_priv,
struct drm_panfrost_submit *args,
struct panfrost_job *job)
-{
- u32 *handles;
- int ret = 0;
- int i, in_fence_count;
- in_fence_count = args->in_sync_count;
- if (!in_fence_count)
return 0;
- handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL);
- if (!handles) {
ret = -ENOMEM;
DRM_DEBUG("Failed to allocate incoming syncobj handles\n");
goto fail;
- }
- if (copy_from_user(handles,
(void __user *)(uintptr_t)args->in_syncs,
in_fence_count * sizeof(u32))) {
ret = -EFAULT;
DRM_DEBUG("Failed to copy in syncobj handles\n");
goto fail;
- }
- for (i = 0; i < in_fence_count; i++) {
struct dma_fence *fence;
ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0,
&fence);
if (ret)
goto fail;
ret = drm_sched_job_add_dependency(&job->base, fence);
if (ret)
goto fail;
- }
-fail:
- kvfree(handles);
- return ret;
-}
-static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
struct drm_file *file)
-{
- struct panfrost_device *pfdev = dev->dev_private;
- struct drm_panfrost_submit *args = data;
- struct drm_syncobj *sync_out = NULL;
- struct panfrost_submitqueue *queue;
- struct panfrost_job *job;
- int ret = 0, slot;
- if (!args->jc)
return -EINVAL;
- if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
return -EINVAL;
- queue = panfrost_submitqueue_get(file->driver_priv, 0);
- if (IS_ERR(queue))
return PTR_ERR(queue);
- if (args->out_sync > 0) {
sync_out = drm_syncobj_find(file, args->out_sync);
if (!sync_out) {
ret = -ENODEV;
goto fail_put_queue;
}
- }
- job = kzalloc(sizeof(*job), GFP_KERNEL);
- if (!job) {
ret = -ENOMEM;
goto out_put_syncout;
- }
- kref_init(&job->refcount);
- job->pfdev = pfdev;
- job->jc = args->jc;
- job->requirements = args->requirements;
- job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
- job->file_priv = file->driver_priv;
- slot = panfrost_job_get_slot(job);
- ret = drm_sched_job_init(&job->base,
&queue->sched_entity[slot],
NULL);
- if (ret)
goto out_put_job;
- ret = panfrost_copy_in_sync(dev, file, args, job);
- if (ret)
goto out_cleanup_job;
- ret = panfrost_lookup_bos(dev, file, args, job);
- if (ret)
goto out_cleanup_job;
- ret = panfrost_job_push(queue, job);
- if (ret)
goto out_cleanup_job;
- /* Update the return sync object for the job */
- if (sync_out)
drm_syncobj_replace_fence(sync_out, job->render_done_fence);
-out_cleanup_job:
- if (ret)
drm_sched_job_cleanup(&job->base);
-out_put_job:
- panfrost_job_put(job);
-out_put_syncout:
- if (sync_out)
drm_syncobj_put(sync_out);
-fail_put_queue:
- panfrost_submitqueue_put(queue);
- return ret;
-}
static int panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -509,6 +322,402 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, return panfrost_submitqueue_destroy(priv, id); }
+#define PANFROST_BO_REF_ALLOWED_FLAGS \
- (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP)
+static int +panfrost_get_job_bos(struct drm_file *file_priv,
u64 refs, u32 ref_stride, u32 count,
struct panfrost_job *job)
+{
- void __user *in = u64_to_user_ptr(refs);
- unsigned int i;
- job->bo_count = count;
- if (!count)
return 0;
- job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos),
GFP_KERNEL | __GFP_ZERO);
- job->bo_flags = kvmalloc_array(job->bo_count,
sizeof(*job->bo_flags),
GFP_KERNEL | __GFP_ZERO);
- if (!job->bos || !job->bo_flags)
return -ENOMEM;
- for (i = 0; i < count; i++) {
struct drm_panfrost_bo_ref ref = { };
int ret;
ret = copy_struct_from_user(&ref, sizeof(ref),
in + (i * ref_stride),
ref_stride);
if (ret)
return ret;
/* Prior to the BATCH_SUBMIT ioctl all accessed BOs were
* treated as exclusive.
*/
if (ref_stride == sizeof(u32))
ref.flags = PANFROST_BO_REF_EXCLUSIVE;
if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS))
return -EINVAL;
job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle);
if (!job->bos[i])
return -EINVAL;
job->bo_flags[i] = ref.flags;
- }
- return 0;
+}
+static int +panfrost_get_job_in_syncs(struct drm_file *file_priv,
u64 refs, u32 ref_stride,
u32 count, struct panfrost_job *job)
+{
- const void __user *in = u64_to_user_ptr(refs);
- unsigned int i;
- int ret;
- if (!count)
return 0;
- for (i = 0; i < count; i++) {
struct drm_panfrost_syncobj_ref ref = { };
struct dma_fence *fence;
ret = copy_struct_from_user(&ref, sizeof(ref),
in + (i * ref_stride),
ref_stride);
if (ret)
return ret;
if (ref.pad)
return -EINVAL;
ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point,
0, &fence);
if (ret)
return ret;
ret = drm_sched_job_add_dependency(&job->base, fence);
if (ret)
return ret;
- }
- return 0;
+}
+struct panfrost_job_out_sync {
- struct drm_syncobj *syncobj;
- struct dma_fence_chain *chain;
- u64 point;
+};
+static void +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) +{
- unsigned int i;
- for (i = 0; i < count; i++) {
if (!out_syncs[i].syncobj)
break;
drm_syncobj_put(out_syncs[i].syncobj);
kvfree(out_syncs[i].chain);
- }
- kvfree(out_syncs);
+}
+static struct panfrost_job_out_sync * +panfrost_get_job_out_syncs(struct drm_file *file_priv,
u64 refs, u32 ref_stride,
u32 count)
+{
- void __user *in = u64_to_user_ptr(refs);
- struct panfrost_job_out_sync *out_syncs;
- unsigned int i;
- int ret;
- if (!count)
return NULL;
- /* If the syncobj ref_stride == sizeof(u32) we are called from the
* old submit ioctl() which only accepted one out syncobj. In that
* case the syncobj handle is passed directly through the
* ->out_syncs field, so let's make sure the refs fits in a u32.
*/
- if (ref_stride == sizeof(u32) &&
(count != 1 || refs > UINT_MAX))
return ERR_PTR(-EINVAL);
- out_syncs = kvmalloc_array(count, sizeof(*out_syncs),
GFP_KERNEL | __GFP_ZERO);
- if (!out_syncs)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < count; i++) {
struct drm_panfrost_syncobj_ref ref = { };
if (ref_stride == sizeof(u32)) {
/* Special case for the old submit wrapper: in that
* case there's only one out_sync, and the syncobj
* handle is passed directly in the out_syncs field.
*/
ref.handle = refs;
} else {
ret = copy_struct_from_user(&ref, sizeof(ref),
in + (i * ref_stride),
ref_stride);
if (ret)
goto err_free_out_syncs;
}
if (ref.pad) {
ret = -EINVAL;
goto err_free_out_syncs;
}
out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle);
if (!out_syncs[i].syncobj) {
ret = -ENODEV;
goto err_free_out_syncs;
}
out_syncs[i].point = ref.point;
if (!out_syncs[i].point)
continue;
out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain),
GFP_KERNEL);
if (!out_syncs[i].chain) {
ret = -ENOMEM;
goto err_free_out_syncs;
}
- }
- return out_syncs;
+err_free_out_syncs:
- panfrost_put_job_out_syncs(out_syncs, count);
- return ERR_PTR(ret);
+}
+static void +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs,
unsigned int count, struct dma_fence *fence)
+{
- unsigned int i;
- for (i = 0; i < count; i++) {
if (out_syncs[i].chain) {
drm_syncobj_add_point(out_syncs[i].syncobj,
out_syncs[i].chain,
fence, out_syncs[i].point);
out_syncs[i].chain = NULL;
} else {
drm_syncobj_replace_fence(out_syncs[i].syncobj,
fence);
}
- }
+}
+struct panfrost_submit_ioctl_version_info {
- u32 job_stride;
- u32 bo_ref_stride;
- u32 syncobj_ref_stride;
+};
+static const struct panfrost_submit_ioctl_version_info submit_versions[] = {
- /* SUBMIT */
- [0] = { 0, 4, 4 },
- /* BATCH_SUBMIT v1 */
- [1] = { 48, 8, 16 },
+};
+#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS
+static int +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv,
struct panfrost_submitqueue *queue,
const struct drm_panfrost_job *args,
u32 version)
+{
- struct panfrost_device *pfdev = dev->dev_private;
- struct panfrost_job_out_sync *out_syncs;
- u32 bo_stride, syncobj_stride;
- struct panfrost_job *job;
- int ret, slot;
- if (args->requirements & ~PANFROST_JD_ALLOWED_REQS)
return -EINVAL;
- if (!args->head)
return -EINVAL;
- bo_stride = submit_versions[version].bo_ref_stride;
- syncobj_stride = submit_versions[version].syncobj_ref_stride;
- job = kzalloc(sizeof(*job), GFP_KERNEL);
- if (!job)
return -ENOMEM;
- kref_init(&job->refcount);
- job->pfdev = pfdev;
- job->jc = args->head;
- job->requirements = args->requirements;
- job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
- job->file_priv = file_priv->driver_priv;
- slot = panfrost_job_get_slot(job);
- ret = drm_sched_job_init(&job->base,
&queue->sched_entity[slot],
NULL);
- if (ret)
goto out_put_job;
- ret = panfrost_get_job_in_syncs(file_priv,
args->in_syncs,
syncobj_stride,
args->in_sync_count,
job);
- if (ret)
goto out_cleanup_job;
- out_syncs = panfrost_get_job_out_syncs(file_priv,
args->out_syncs,
syncobj_stride,
args->out_sync_count);
- if (IS_ERR(out_syncs)) {
ret = PTR_ERR(out_syncs);
goto out_cleanup_job;
- }
- ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride,
args->bo_count, job);
- if (ret)
goto out_put_outsyncs;
- ret = panfrost_get_job_mappings(file_priv, job);
- if (ret)
goto out_put_outsyncs;
- ret = panfrost_job_push(queue, job);
- if (ret)
goto out_put_outsyncs;
- panfrost_set_job_out_fence(out_syncs, args->out_sync_count,
job->render_done_fence);
+out_put_outsyncs:
- panfrost_put_job_out_syncs(out_syncs, args->out_sync_count);
+out_cleanup_job:
- if (ret)
drm_sched_job_cleanup(&job->base);
+out_put_job:
- panfrost_job_put(job);
- return ret;
+}
+static int +panfrost_ioctl_submit(struct drm_device *dev, void *data,
struct drm_file *file)
+{
- struct drm_panfrost_submit *args = data;
- struct drm_panfrost_job job_args = {
.head = args->jc,
.bos = args->bo_handles,
.in_syncs = args->in_syncs,
/* We are abusing .out_syncs and passing the handle directly
* instead of a pointer to a user u32 array, but
* panfrost_job_submit() knows about it, so it's fine.
*/
.out_syncs = args->out_sync,
.in_sync_count = args->in_sync_count,
.out_sync_count = args->out_sync > 0 ? 1 : 0,
.bo_count = args->bo_handle_count,
.requirements = args->requirements
- };
- struct panfrost_submitqueue *queue;
- int ret;
- queue = panfrost_submitqueue_get(file->driver_priv, 0);
- if (IS_ERR(queue))
return PTR_ERR(queue);
- ret = panfrost_submit_job(dev, file, queue, &job_args, 0);
- panfrost_submitqueue_put(queue);
- return ret;
+}
+static int +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data,
struct drm_file *file_priv)
+{
- struct drm_panfrost_batch_submit *args = data;
- void __user *jobs_args = u64_to_user_ptr(args->jobs);
- struct panfrost_submitqueue *queue;
- u32 version = args->version;
- u32 job_stride;
- unsigned int i;
- int ret = 0;
- /* Make sure the SUBMIT_BATCH_VERSION and desc sizes are synced */
- BUILD_BUG_ON(sizeof(struct drm_panfrost_job) !=
submit_versions[PANFROST_SUBMIT_BATCH_VERSION].job_stride);
- BUILD_BUG_ON(sizeof(struct drm_panfrost_bo_ref) !=
submit_versions[PANFROST_SUBMIT_BATCH_VERSION].bo_ref_stride);
- BUILD_BUG_ON(sizeof(struct drm_panfrost_syncobj_ref) !=
submit_versions[PANFROST_SUBMIT_BATCH_VERSION].syncobj_ref_stride);
- /* Version 0 doesn't exists (it's reserved for the SUBMIT ioctl) */
- if (!version)
return -EINVAL;
- /* If the version specified is bigger than what we currently support,
* pick the last supported version and let copy_struct_from_user()
* check that any extra job, bo_ref and syncobj_ref fields are zeroed.
*/
- if (version >= ARRAY_SIZE(submit_versions))
version = ARRAY_SIZE(submit_versions) - 1;
- queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue);
- if (IS_ERR(queue))
return PTR_ERR(queue);
- job_stride = submit_versions[version].job_stride;
- for (i = 0; i < args->job_count; i++) {
struct drm_panfrost_job job_args = { };
ret = copy_struct_from_user(&job_args, sizeof(job_args),
jobs_args + (i * job_stride),
job_stride);
if (!ret) {
ret = panfrost_submit_job(dev, file_priv, queue,
&job_args, version);
}
if (ret) {
args->fail_idx = i;
break;
}
- }
- panfrost_submitqueue_put(queue);
- return ret;
+}
int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -590,6 +799,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW),
- PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW),
};
DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 62e010307afc..0367cee8f6df 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -254,7 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) return ret; }
/* panfrost always uses write mode in its current uapi */
if ((job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) && !exclusive)
continue;
- ret = drm_sched_job_add_implicit_dependencies(&job->base, job->bos[i], exclusive); if (ret)
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 66e1c2f1ff4b..5e3f8a344f41 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -23,6 +23,7 @@ extern "C" { #define DRM_PANFROST_MADVISE 0x08 #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a +#define DRM_PANFROST_BATCH_SUBMIT 0x0b
#define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -33,6 +34,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit)
/*
- Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
@@ -243,9 +245,99 @@ struct drm_panfrost_create_submitqueue { __u32 id; /* out, identifier */ };
+/* Syncobj reference passed at job submission time to encode explicit
- input/output fences.
- */
+struct drm_panfrost_syncobj_ref {
- /** Syncobj handle */
- __u32 handle;
- /** Padding field, must be set to 0 */
- __u32 pad;
- /**
* For timeline syncobjs, the point on the timeline the reference
* points to. 0 for the last point.
* Must be set to 0 for non-timeline syncobjs
*/
- __u64 point;
+};
/* Exclusive (AKA write) access to the BO */ #define PANFROST_BO_REF_EXCLUSIVE 0x1
+/* Disable the implicit dependency on the BO fence */ +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2
+/* Describes a BO referenced by a job and the type of access. */ +struct drm_panfrost_bo_ref {
- /** A GEM handle */
- __u32 handle;
- /** A combination of PANFROST_BO_REF_x flags */
- __u32 flags;
+};
+/* Describes a GPU job and the resources attached to it. */ +struct drm_panfrost_job {
- /** GPU pointer to the head of the job chain. */
- __u64 head;
- /**
* Array of drm_panfrost_bo_ref objects describing the BOs referenced
* by this job.
*/
- __u64 bos;
- /**
* Arrays of drm_panfrost_syncobj_ref objects describing the input
* and output fences.
*/
- __u64 in_syncs;
- __u64 out_syncs;
- /** Syncobj reference array sizes. */
- __u32 in_sync_count;
- __u32 out_sync_count;
- /** BO reference array size. */
- __u32 bo_count;
- /** Combination of PANFROST_JD_REQ_* flags. */
- __u32 requirements;
+};
+#define PANFROST_SUBMIT_BATCH_VERSION 1
+/* Used to submit multiple jobs in one call */ +struct drm_panfrost_batch_submit {
- /**
* Always set to PANFROST_SUBMIT_BATCH_VERSION. This is used to let the
* kernel know about the size of the various structs passed to the
* BATCH_SUBMIT ioctl.
*/
- __u32 version;
- /** Number of jobs to submit. */
- __u32 job_count;
- /* Pointer to a job array. */
- __u64 jobs;
- /**
* ID of the queue to submit those jobs to. 0 is the default
* submit queue and should always exists. If you need a dedicated
* queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE.
*/
- __u32 queue;
- /**
* If the submission fails, this encodes the index of the first job
* that failed.
*/
- __u32 fail_idx;
+};
#if defined(__cplusplus) } #endif