The userspace API already had everything needed to handle read vs write synchronization. This patch actually bothers to hook it up properly, so that we don't need to (for example) stall on userspace read access to a buffer that gpu is also still reading.
Signed-off-by: Rob Clark robdclark@gmail.com --- drivers/gpu/drm/msm/msm_drv.h | 2 +- drivers/gpu/drm/msm/msm_gem.c | 25 ++++++++++++++++++------- drivers/gpu/drm/msm/msm_gem.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 9 +++++++-- 4 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 80d7509..1ea9d46 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -153,7 +153,7 @@ void *msm_gem_vaddr(struct drm_gem_object *obj); int msm_gem_queue_inactive_work(struct drm_gem_object *obj, struct work_struct *work); void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, uint32_t fence); + struct msm_gpu *gpu, bool write, uint32_t fence); void msm_gem_move_to_inactive(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, struct timespec *timeout); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6b5a6c8..df0390f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -393,11 +393,14 @@ int msm_gem_queue_inactive_work(struct drm_gem_object *obj, }
void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, uint32_t fence) + struct msm_gpu *gpu, bool write, uint32_t fence) { struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_obj->gpu = gpu; - msm_obj->fence = fence; + if (write) + msm_obj->write_fence = fence; + else + msm_obj->read_fence = fence; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &gpu->active_list); } @@ -411,7 +414,8 @@ void msm_gem_move_to_inactive(struct drm_gem_object *obj) WARN_ON(!mutex_is_locked(&dev->struct_mutex));
msm_obj->gpu = NULL; - msm_obj->fence = 0; + msm_obj->read_fence = 0; + msm_obj->write_fence = 0; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &priv->inactive_list);
@@ -433,8 +437,14 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0;
- if (is_active(msm_obj) && !(op & MSM_PREP_NOSYNC)) - ret = msm_wait_fence_interruptable(dev, msm_obj->fence, timeout); + if (is_active(msm_obj) && !(op & MSM_PREP_NOSYNC)) { + uint32_t fence = 0; + if (op & MSM_PREP_READ) + fence = msm_obj->write_fence; + if (op & MSM_PREP_WRITE) + fence = max(fence, msm_obj->read_fence); + ret = msm_wait_fence_interruptable(dev, fence, timeout); + }
/* TODO cache maintenance */
@@ -455,9 +465,10 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) uint64_t off = drm_vma_node_start(&obj->vma_node);
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - seq_printf(m, "%08x: %c(%d) %2d (%2d) %08llx %p %d\n", + seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %d\n", msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', - msm_obj->fence, obj->name, obj->refcount.refcount.counter, + msm_obj->read_fence, msm_obj->write_fence, + obj->name, obj->refcount.refcount.counter, off, msm_obj->vaddr, obj->size); }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index d746f13..0676f32 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -36,7 +36,7 @@ struct msm_gem_object { */ struct list_head mm_list; struct msm_gpu *gpu; /* non-null if active */ - uint32_t fence; + uint32_t read_fence, write_fence;
/* Transiently in the process of submit ioctl, objects associated * with the submit are on submit->bo_list.. this only lasts for diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index e1e1ec9..cb9cdff 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -265,7 +265,8 @@ static void retire_worker(struct work_struct *work) obj = list_first_entry(&gpu->active_list, struct msm_gem_object, mm_list);
- if (obj->fence <= fence) { + if ((obj->read_fence <= fence) && + (obj->write_fence <= fence)) { /* move to inactive: */ msm_gem_move_to_inactive(&obj->base); msm_gem_put_iova(&obj->base, gpu->id); @@ -321,7 +322,11 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, submit->gpu->id, &iova); }
- msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence); + if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) + msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); + + if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) + msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); } hangcheck_timer_reset(gpu); mutex_unlock(&dev->struct_mutex);
If gpu locks up with the rptr shortly beyond the wrap-around point in the ringbuffer, because the rptr was not reset (but wptr is, by virtue of resetting rb->cur), we could end up in a scenario where we think there is not enough space in the ringbuffer for the next cmds. And since the CP won't reset rptr until after processing an IB, this leaves things in a sort of deadlock.
So reset rptr too. And a bit more spiffing up of hangcheck to make things easier to debug.
Signed-off-by: Rob Clark robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 +++++++++- drivers/gpu/drm/msm/msm_gpu.c | 9 ++++++++- 2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a605847..a0b9d8a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -124,6 +124,8 @@ void adreno_recover(struct msm_gpu *gpu)
/* reset completed fence seqno, just discard anything pending: */ adreno_gpu->memptrs->fence = gpu->submitted_fence; + adreno_gpu->memptrs->rptr = 0; + adreno_gpu->memptrs->wptr = 0;
gpu->funcs->pm_resume(gpu); ret = gpu->funcs->hw_init(gpu); @@ -229,7 +231,7 @@ void adreno_idle(struct msm_gpu *gpu) return; } while(time_before(jiffies, t));
- DRM_ERROR("timeout waiting for %s to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
/* TODO maybe we need to reset GPU here to recover from hang? */ } @@ -256,11 +258,17 @@ void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t freedwords; + unsigned long t = jiffies + ADRENO_IDLE_TIMEOUT; do { uint32_t size = gpu->rb->size / 4; uint32_t wptr = get_wptr(gpu->rb); uint32_t rptr = adreno_gpu->memptrs->rptr; freedwords = (rptr + (size - 1) - wptr) % size; + + if (time_after(jiffies, t)) { + DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + break; + } } while(freedwords < ndwords); }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index cb9cdff..10cc443 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -237,8 +237,15 @@ static void hangcheck_handler(unsigned long data) gpu->hangcheck_fence = fence; } else if (fence < gpu->submitted_fence) { /* no progress and not done.. hung! */ - struct msm_drm_private *priv = gpu->dev->dev_private; + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; gpu->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", + gpu->name); + dev_err(dev->dev, "%s: completed fence: %u\n", + gpu->name, fence); + dev_err(dev->dev, "%s: submitted fence: %u\n", + gpu->name, gpu->submitted_fence); queue_work(priv->wq, &gpu->recover_work); }
dri-devel@lists.freedesktop.org