On 13/09/2019 12:17, Boris Brezillon wrote:
The READ/WRITE flags are particularly useful if we want to avoid serialization of jobs that read from the same BO but never write to it. The NO_IMPLICIT_FENCE might be useful when the user knows the BO is shared but jobs are using different portions of the buffer.
Signed-off-by: Boris Brezillon boris.brezillon@collabora.com
Good feature - we could do with an (easy) way of the user driver detecting this - so it might be worth bumping the driver version for this?
Some more comments below.
drivers/gpu/drm/panfrost/panfrost_drv.c | 72 +++++++++-- drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++----- drivers/gpu/drm/panfrost/panfrost_job.h | 11 +- include/uapi/drm/panfrost_drm.h | 41 ++++++ 4 files changed, 247 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d74442d71048..08082fd557c3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev, struct drm_panfrost_submit *args, struct panfrost_job *job) {
- job->bo_count = args->bo_handle_count;
- struct drm_panfrost_submit_bo *bo_descs = NULL;
- u32 *handles = NULL;
- u32 i, bo_count;
- int ret = 0;
- if (!job->bo_count)
- bo_count = args->bo_desc_count ?
args->bo_desc_count : args->bo_handle_count;
- if (!bo_count) return 0;
- job->implicit_fences = kvmalloc_array(job->bo_count,
sizeof(struct dma_fence *),
- job->bos = kvmalloc_array(bo_count, sizeof(*job->bos), GFP_KERNEL | __GFP_ZERO);
- if (!job->implicit_fences)
- if (!job->bos) return -ENOMEM;
- return drm_gem_objects_lookup(file_priv,
(void __user *)(uintptr_t)args->bo_handles,
job->bo_count, &job->bos);
- job->bo_count = bo_count;
- bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
GFP_KERNEL | __GFP_ZERO);
- if (!bo_descs) {
ret = -ENOMEM;
goto out;
This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.
- }
- if (!args->bo_desc_count) {
handles = kvmalloc_array(bo_count, sizeof(*handles),
GFP_KERNEL);
if (!handles) {
ret =-ENOMEM;
goto out;
}
if (copy_from_user(handles,
(void __user *)(uintptr_t)args->bo_handles,
job->bo_count * sizeof(*handles))) {
ret = -EFAULT;
goto out;
}
for (i = 0; i < job->bo_count; i++) {
bo_descs[i].handle = handles[i];
bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
PANFROST_SUBMIT_BO_READ;
You can use PANFROST_SUBMIT_BO_RW here.
}
- } else if (copy_from_user(bo_descs,
(void __user *)(uintptr_t)args->bo_descs,
job->bo_count * sizeof(*bo_descs))) {
ret = -EFAULT;
goto out;
- }
- for (i = 0; i < job->bo_count; i++) {
if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
!(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
ret = -EINVAL;
goto out;
}
job->bos[i].flags = bo_descs[i].flags;
job->bos[i].obj = drm_gem_object_lookup(file_priv,
bo_descs[i].handle);
if (!job->bos[i].obj) {
ret = -ENOENT;
goto out;
}
- }
+out:
- kvfree(handles);
- kvfree(bo_descs);
- return ret;
}
/** diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 05c85f45a0de..e4b74fde9339 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) pm_runtime_put_autosuspend(pfdev->dev); }
-static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
int bo_count,
struct dma_fence **implicit_fences)
+static int panfrost_acquire_object_fences(struct panfrost_job *job) {
- int i;
- int i, ret;
- for (i = 0; i < bo_count; i++)
implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
- for (i = 0; i < job->bo_count; i++) {
struct panfrost_job_bo_desc *bo = &job->bos[i];
struct dma_resv *robj = bo->obj->resv;
if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
ret = dma_resv_reserve_shared(robj, 1);
if (ret)
return ret;
}
if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
continue;
if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
ret = dma_resv_get_fences_rcu(robj, &bo->excl,
&bo->shared_count,
&bo->shared);
if (ret)
return ret;
} else {
bo->excl = dma_resv_get_excl_rcu(robj);
}
The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ | NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't understand why.
- }
- return 0;
}
-static void panfrost_attach_object_fences(struct drm_gem_object **bos,
int bo_count,
struct dma_fence *fence)
+static void panfrost_attach_object_fences(struct panfrost_job *job) { int i;
- for (i = 0; i < bo_count; i++)
dma_resv_add_excl_fence(bos[i]->resv, fence);
- for (i = 0; i < job->bo_count; i++) {
struct drm_gem_object *obj = job->bos[i].obj;
if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
dma_resv_add_excl_fence(obj->resv,
job->render_done_fence);
else
dma_resv_add_shared_fence(obj->resv,
job->render_done_fence);
- }
+}
+static int panfrost_job_lock_bos(struct panfrost_job *job,
struct ww_acquire_ctx *acquire_ctx)
+{
- int contended = -1;
- int i, ret;
- ww_acquire_init(acquire_ctx, &reservation_ww_class);
+retry:
- if (contended != -1) {
struct drm_gem_object *obj = job->bos[contended].obj;
ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
acquire_ctx);
dma_resv_lock_slot_interruptible()?
if (ret) {
ww_acquire_done(acquire_ctx);
return ret;
}
- }
- for (i = 0; i < job->bo_count; i++) {
if (i == contended)
continue;
ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
acquire_ctx);
dma_resv_lock_interruptible()?
if (ret) {
int j;
for (j = 0; j < i; j++)
ww_mutex_unlock(&job->bos[j].obj->resv->lock);
if (contended != -1 && contended >= i) {
struct drm_gem_object *contended_obj;
contended_obj = job->bos[contended].obj;
ww_mutex_unlock(&contended_obj->resv->lock);
}
if (ret == -EDEADLK) {
contended = i;
goto retry;
}
ww_acquire_done(acquire_ctx);
return ret;
}
- }
- ww_acquire_done(acquire_ctx);
- return 0;
+}
This looks like a copy of drm_gem_lock_reservations(). The only reason for it as far as I can see is because we now have an array of struct panfrost_job_bo_desc rather than a direct array of struct drm_gem_object. I'm not sure having everything neatly in one structure is worth this cost?
+static void panfrost_job_unlock_bos(struct panfrost_job *job,
struct ww_acquire_ctx *acquire_ctx)
+{
- int i;
- for (i = 0; i < job->bo_count; i++)
ww_mutex_unlock(&job->bos[i].obj->resv->lock);
- ww_acquire_fini(acquire_ctx);
}> int panfrost_job_push(struct panfrost_job *job) @@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job)
mutex_lock(&pfdev->sched_lock);
- ret = drm_gem_lock_reservations(job->bos, job->bo_count,
&acquire_ctx);
- ret = panfrost_job_lock_bos(job, &acquire_ctx); if (ret) { mutex_unlock(&pfdev->sched_lock); return ret;
@@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job)
kref_get(&job->refcount); /* put by scheduler job completion */
- panfrost_acquire_object_fences(job->bos, job->bo_count,
job->implicit_fences);
panfrost_acquire_object_fences(job);
drm_sched_entity_push_job(&job->base, entity);
mutex_unlock(&pfdev->sched_lock);
- panfrost_attach_object_fences(job->bos, job->bo_count,
job->render_done_fence);
- panfrost_attach_object_fences(job);
unlock:
- drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
panfrost_job_unlock_bos(job, &acquire_ctx);
return ret;
} @@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref) dma_fence_put(job->in_fences[i]); kvfree(job->in_fences); }
- if (job->implicit_fences) {
for (i = 0; i < job->bo_count; i++)
dma_fence_put(job->implicit_fences[i]);
kvfree(job->implicit_fences);
- for (i = 0; i < job->bo_count; i++) {
unsigned int j;
dma_fence_put(job->bos[i].excl);
for (j = 0; j < job->bos[i].shared_count; j++)
}dma_fence_put(job->bos[i].shared[j]);
- dma_fence_put(job->done_fence); dma_fence_put(job->render_done_fence);
- if (job->bos) {
for (i = 0; i < job->bo_count; i++)
drm_gem_object_put_unlocked(job->bos[i]);
kvfree(job->bos);
- }
for (i = 0; i < job->bo_count; i++)
drm_gem_object_put_unlocked(job->bos[i].obj);
kvfree(job->bos); kfree(job);
}
@@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job } }
- /* Implicit fences, max. one per BO */
- /* Implicit fences */ for (i = 0; i < job->bo_count; i++) {
if (job->implicit_fences[i]) {
fence = job->implicit_fences[i];
job->implicit_fences[i] = NULL;
unsigned int j;
if (job->bos[i].excl) {
fence = job->bos[i].excl;
job->bos[i].excl = NULL;
return fence;
}
for (j = 0; j < job->bos[i].shared_count; j++) {
if (!job->bos[i].shared[j])
continue;
fence = job->bos[i].shared[j];
} }job->bos[i].shared[j] = NULL; return fence;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index 62454128a792..53440640a6e3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -11,6 +11,14 @@ struct panfrost_device; struct panfrost_gem_object; struct panfrost_file_priv;
+struct panfrost_job_bo_desc {
- struct drm_gem_object *obj;
- struct dma_fence *excl;
- struct dma_fence **shared;
- unsigned int shared_count;
- u32 flags;
+};
struct panfrost_job { struct drm_sched_job base;
@@ -31,8 +39,7 @@ struct panfrost_job { __u32 flush_id;
/* Exclusive fences we have taken from the BOs to wait for */
Comment needs updating
- struct dma_fence **implicit_fences;
- struct drm_gem_object **bos;
struct panfrost_job_bo_desc *bos; u32 bo_count;
/* Fence to be signaled by drm-sched once its done with the job */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index ec19db1eead8..029c6ae1b1f0 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -40,6 +40,28 @@ extern "C" { #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
#define PANFROST_JD_REQ_FS (1 << 0)
+#define PANFROST_SUBMIT_BO_READ (1 << 0) +#define PANFROST_SUBMIT_BO_WRITE (1 << 1) +#define PANFROST_SUBMIT_BO_RW (PANFROST_SUBMIT_BO_READ | \
PANFROST_SUBMIT_BO_WRITE)
+#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE (1 << 2) +#define PANFROST_SUBMIT_BO_VALID_FLAGS \
- (PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
+/**
- struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl.
- Useful to give detailed information about BOs used by a given job.
- @handle: the GEM handle
- @flags: a combination of PANFROST_SUBMIT_BO_*
- */
+struct drm_panfrost_submit_bo {
- __u32 handle;
- __u32 flags;
+};
/**
- struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
- engine.
@@ -68,6 +90,25 @@ struct drm_panfrost_submit {
/** A combination of PANFROST_JD_REQ_* */ __u32 requirements;
- /**
* Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
* field is meant to replace &drm_panfrost_submit.bo_handles which did
* not provide enough information to relax synchronization between
* jobs that only only read the BO they share. When both
* &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
* are provided, drm_panfrost_submit.bo_handles is ignored.
*/
- __u64 bo_descs;
- /**
* Number of BO descriptors passed in (size is that times
* sizeof(drm_panfrost_submit_bo_desc)).
*/
- __u32 bo_desc_count;
We don't really need another count field. bo_handle_count could be re-used. Indeed this could even be handled with just a flags field with a new flag specifying that bo_handles no longer points to handles but to bo_desc objects instead.
Steve
- /** Padding, must be 0. */
- __u32 pad;
};
/**