New subject: [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl

13 Sep 2019

The READ/WRITE flags are particularly useful if we want to avoid
serialization of jobs that read from the same BO but never write to it.
The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
shared but jobs are using different portions of the buffer.
Signed-off-by: Boris Brezillon boris.brezillon@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
 drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
 drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
 include/uapi/drm/panfrost_drm.h         |  41 ++++++
 4 files changed, 247 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d74442d71048..08082fd557c3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
    	  struct drm_panfrost_submit *args,
    	  struct panfrost_job *job)
 {
-	job->bo_count = args->bo_handle_count;
+	struct drm_panfrost_submit_bo *bo_descs = NULL;
+	u32 *handles = NULL;
+	u32 i, bo_count;
+	int ret = 0;
-	if (!job->bo_count)
+	bo_count = args->bo_desc_count ?
+		   args->bo_desc_count : args->bo_handle_count;
+	if (!bo_count)
    	return 0;
-	job->implicit_fences = kvmalloc_array(job->bo_count,
-				  sizeof(struct dma_fence *),
+	job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
    			  GFP_KERNEL | __GFP_ZERO);
-	if (!job->implicit_fences)
+	if (!job->bos)
    	return -ENOMEM;
-	return drm_gem_objects_lookup(file_priv,
-				      (void __user *)(uintptr_t)args->bo_handles,
-				      job->bo_count, &job->bos);
+	job->bo_count = bo_count;
+	bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!bo_descs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (!args->bo_desc_count) {
+		handles = kvmalloc_array(bo_count, sizeof(*handles),
+					 GFP_KERNEL);
+		if (!handles) {
+			ret =-ENOMEM;
+			goto out;
+		}
+
+		if (copy_from_user(handles,
+				   (void __user *)(uintptr_t)args->bo_handles,
+				   job->bo_count * sizeof(*handles))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		for (i = 0; i < job->bo_count; i++) {
+			bo_descs[i].handle = handles[i];
+			bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
+					    PANFROST_SUBMIT_BO_READ;
+		}
+	} else if (copy_from_user(bo_descs,
+				  (void __user *)(uintptr_t)args->bo_descs,
+				  job->bo_count * sizeof(*bo_descs))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
+                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		job->bos[i].flags = bo_descs[i].flags;
+		job->bos[i].obj = drm_gem_object_lookup(file_priv,
+							bo_descs[i].handle);
+		if (!job->bos[i].obj) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+out:
+	kvfree(handles);
+	kvfree(bo_descs);
+	return ret;
 }
/**
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 05c85f45a0de..e4b74fde9339 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
    pm_runtime_put_autosuspend(pfdev->dev);
 }
-static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
-					   int bo_count,
-					   struct dma_fence **implicit_fences)
+static int panfrost_acquire_object_fences(struct panfrost_job *job)
 {
-	int i;
+	int i, ret;
-	for (i = 0; i < bo_count; i++)
-		implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
+	for (i = 0; i < job->bo_count; i++) {
+		struct panfrost_job_bo_desc *bo = &job->bos[i];
+		struct dma_resv *robj = bo->obj->resv;
+
+		if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
+			ret = dma_resv_reserve_shared(robj, 1);
+			if (ret)
+				return ret;
+		}
+
+		if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
+			continue;
+
+		if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
+			ret = dma_resv_get_fences_rcu(robj, &bo->excl,
+						      &bo->shared_count,
+						      &bo->shared);
+			if (ret)
+				return ret;
+		} else {
+			bo->excl = dma_resv_get_excl_rcu(robj);
+		}
+	}
+
+	return 0;
 }
-static void panfrost_attach_object_fences(struct drm_gem_object **bos,
-					  int bo_count,
-					  struct dma_fence *fence)
+static void panfrost_attach_object_fences(struct panfrost_job *job)
 {
    int i;
-	for (i = 0; i < bo_count; i++)
-		dma_resv_add_excl_fence(bos[i]->resv, fence);
+	for (i = 0; i < job->bo_count; i++) {
+		struct drm_gem_object *obj = job->bos[i].obj;
+
+		if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
+			dma_resv_add_excl_fence(obj->resv,
+						job->render_done_fence);
+		else
+			dma_resv_add_shared_fence(obj->resv,
+						  job->render_done_fence);
+	}
+}
+
+static int panfrost_job_lock_bos(struct panfrost_job *job,
+				 struct ww_acquire_ctx *acquire_ctx)
+{
+	int contended = -1;
+	int i, ret;
+
+	ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+	if (contended != -1) {
+		struct drm_gem_object *obj = job->bos[contended].obj;
+
+		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
+						       acquire_ctx);
+		if (ret) {
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		if (i == contended)
+			continue;
+
+		ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
+						  acquire_ctx);
+		if (ret) {
+			int j;
+
+			for (j = 0; j < i; j++)
+				ww_mutex_unlock(&job->bos[j].obj->resv->lock);
+
+			if (contended != -1 && contended >= i) {
+				struct drm_gem_object *contended_obj;
+
+				contended_obj = job->bos[contended].obj;
+				ww_mutex_unlock(&contended_obj->resv->lock);
+			}
+
+			if (ret == -EDEADLK) {
+				contended = i;
+				goto retry;
+			}
+
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	ww_acquire_done(acquire_ctx);
+
+	return 0;
+}
+
+static void panfrost_job_unlock_bos(struct panfrost_job *job,
+				    struct ww_acquire_ctx *acquire_ctx)
+{
+	int i;
+
+	for (i = 0; i < job->bo_count; i++)
+		ww_mutex_unlock(&job->bos[i].obj->resv->lock);
+
+	ww_acquire_fini(acquire_ctx);
 }
int panfrost_job_push(struct panfrost_job *job)
@@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job)
mutex_lock(&pfdev->sched_lock);
-	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
-					    &acquire_ctx);
+	ret = panfrost_job_lock_bos(job, &acquire_ctx);
    if (ret) {
    	mutex_unlock(&pfdev->sched_lock);
    	return ret;
@@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job)
kref_get(&job->refcount); /* put by scheduler job completion */
-	panfrost_acquire_object_fences(job->bos, job->bo_count,
-				       job->implicit_fences);
+	panfrost_acquire_object_fences(job);
drm_sched_entity_push_job(&job->base, entity);
mutex_unlock(&pfdev->sched_lock);
-	panfrost_attach_object_fences(job->bos, job->bo_count,
-				      job->render_done_fence);
+	panfrost_attach_object_fences(job);
unlock:
-	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
+	panfrost_job_unlock_bos(job, &acquire_ctx);
return ret;
 }
@@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref)
    		dma_fence_put(job->in_fences[i]);
    	kvfree(job->in_fences);
    }
-	if (job->implicit_fences) {
-		for (i = 0; i < job->bo_count; i++)
-			dma_fence_put(job->implicit_fences[i]);
-		kvfree(job->implicit_fences);
+
+	for (i = 0; i < job->bo_count; i++) {
+		unsigned int j;
+
+		dma_fence_put(job->bos[i].excl);
+		for (j = 0; j < job->bos[i].shared_count; j++)
+			dma_fence_put(job->bos[i].shared[j]);
    }
+
    dma_fence_put(job->done_fence);
    dma_fence_put(job->render_done_fence);
-	if (job->bos) {
-		for (i = 0; i < job->bo_count; i++)
-			drm_gem_object_put_unlocked(job->bos[i]);
-		kvfree(job->bos);
-	}
+	for (i = 0; i < job->bo_count; i++)
+		drm_gem_object_put_unlocked(job->bos[i].obj);
+	kvfree(job->bos);
    kfree(job);
 }
@@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job
    	}
    }
-	/* Implicit fences, max. one per BO */
+	/* Implicit fences */
    for (i = 0; i < job->bo_count; i++) {
-		if (job->implicit_fences[i]) {
-			fence = job->implicit_fences[i];
-			job->implicit_fences[i] = NULL;
+		unsigned int j;
+
+		if (job->bos[i].excl) {
+			fence = job->bos[i].excl;
+			job->bos[i].excl = NULL;
+			return fence;
+		}
+
+		for (j = 0; j < job->bos[i].shared_count; j++) {
+			if (!job->bos[i].shared[j])
+				continue;
+
+			fence = job->bos[i].shared[j];
+			job->bos[i].shared[j] = NULL;
    		return fence;
    	}
    }
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 62454128a792..53440640a6e3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -11,6 +11,14 @@ struct panfrost_device;
 struct panfrost_gem_object;
 struct panfrost_file_priv;
+struct panfrost_job_bo_desc {
+	struct drm_gem_object *obj;
+	struct dma_fence *excl;
+	struct dma_fence **shared;
+	unsigned int shared_count;
+	u32 flags;
+};
+
 struct panfrost_job {
    struct drm_sched_job base;
@@ -31,8 +39,7 @@ struct panfrost_job {
    __u32 flush_id;
/* Exclusive fences we have taken from the BOs to wait for */
-	struct dma_fence **implicit_fences;
-	struct drm_gem_object **bos;
+	struct panfrost_job_bo_desc *bos;
    u32 bo_count;
/* Fence to be signaled by drm-sched once its done with the job */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index ec19db1eead8..029c6ae1b1f0 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -40,6 +40,28 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
#define PANFROST_JD_REQ_FS (1 << 0)
+
+#define PANFROST_SUBMIT_BO_READ			(1 << 0)
+#define PANFROST_SUBMIT_BO_WRITE		(1 << 1)
+#define PANFROST_SUBMIT_BO_RW			(PANFROST_SUBMIT_BO_READ | \
+						 PANFROST_SUBMIT_BO_WRITE)
+#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE	(1 << 2)
+#define PANFROST_SUBMIT_BO_VALID_FLAGS		\
+	(PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
+
+/**
+ * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl.
+ *
+ * Useful to give detailed information about BOs used by a given job.
+ *
+ * @handle: the GEM handle
+ * @flags: a combination of PANFROST_SUBMIT_BO_*
+ */
+struct drm_panfrost_submit_bo {
+	__u32 handle;
+	__u32 flags;
+};
+
 /**
  * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
  * engine.
@@ -68,6 +90,25 @@ struct drm_panfrost_submit {
/** A combination of PANFROST_JD_REQ_* */
    __u32 requirements;
+
+	/**
+	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
+	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
+	 * not provide enough information to relax synchronization between
+	 * jobs that only only read the BO they share. When both
+	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
+	 * are provided, drm_panfrost_submit.bo_handles is ignored.
+	 */
+	__u64 bo_descs;
+
+	/**
+	 * Number of BO descriptors passed in (size is that times
+	 * sizeof(drm_panfrost_submit_bo_desc)).
+	 */
+	__u32 bo_desc_count;
+
+	/** Padding, must be 0. */
+	__u32 pad;
 };
/**
-- 
2.21.0


    

[PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job