This series consists of fixes and cleanups for virtio_gpu_queue_fenced_ctrl_buffer, except for the last patch. The fixes are for corner cases that were overlooked. The cleanups make the last patch easier, but they should be good in themselves as well.
The last patch changes the disable_notify mechanism to call virtqueue_kick_prepare only once in virtio_gpu_enable_notify. It should be more efficient than doing that after each command is queued.
There is a follow-up patch that replaces the global disable_notify state by command-level bools to disable notify for individual commands, so that one process cannot affect another process. I can include it in v2 if you want to review it together with this series.
outcnt may be greater than 1 since commit e1218b8c0cc1 (drm/virtio: Use vmalloc for command buffer allocations.).
Signed-off-by: Chia-I Wu olvaffe@gmail.com Cc: David Riley davidriley@chromium.org --- drivers/gpu/drm/virtio/virtgpu_vq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 5914e79d3429f..5169e8dd9fd6c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -391,7 +391,8 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, */ if (vq->num_free < 2 + outcnt) { spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= 3); + wait_event(vgdev->ctrlq.ack_queue, + vq->num_free >= 2 + outcnt); goto again; }
The handling of virtqueue_add_sgs ENOSPC error is incorrect because it can result in out-of-order virtqueue_add_sgs and break fences. We never get ENOSPC anyway because the caller waits until there is enough space (the other caller that did not wait was removed in commit 32d6c2c5b522). Remove the incorrect and unnecessary error path.
This also adds a WARN_ON(ret) until we properly handle errors.
Signed-off-by: Chia-I Wu olvaffe@gmail.com Cc: David Riley davidriley@chromium.org --- drivers/gpu/drm/virtio/virtgpu_vq.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 5169e8dd9fd6c..63d2df7fb0c98 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -310,8 +310,6 @@ static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, struct scatterlist *vout) - __releases(&vgdev->ctrlq.qlock) - __acquires(&vgdev->ctrlq.qlock) { struct virtqueue *vq = vgdev->ctrlq.vq; struct scatterlist *sgs[3], vcmd, vresp; @@ -337,19 +335,14 @@ static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, incnt++; }
-retry: ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); - if (ret == -ENOSPC) { - spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= outcnt + incnt); - spin_lock(&vgdev->ctrlq.qlock); - goto retry; - } else { - trace_virtio_gpu_cmd_queue(vq, - (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + WARN_ON(ret); + + trace_virtio_gpu_cmd_queue(vq, + (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + + notify = virtqueue_kick_prepare(vq);
- notify = virtqueue_kick_prepare(vq); - } return notify; }
It is a helper to return the virtio_gpu_ctrl_hdr in a vbuf.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 63d2df7fb0c98..312fd8a039a1e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -95,7 +95,8 @@ virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev, if (!vbuf) return ERR_PTR(-ENOMEM);
- BUG_ON(size > MAX_INLINE_CMD_SIZE); + BUG_ON(size > MAX_INLINE_CMD_SIZE || + size < sizeof(struct virtio_gpu_ctrl_hdr)); vbuf->buf = (void *)vbuf + sizeof(*vbuf); vbuf->size = size;
@@ -109,6 +110,16 @@ virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev, return vbuf; }
+static struct virtio_gpu_ctrl_hdr * +virtio_gpu_vbuf_ctrl_hdr(struct virtio_gpu_vbuffer *vbuf) +{ + /* this assumes a vbuf contains a command that starts with a + * virtio_gpu_ctrl_hdr, which is true for both ctrl and cursor + * virtqueues. + */ + return (struct virtio_gpu_ctrl_hdr *)vbuf->buf; +} + static void *virtio_gpu_alloc_cmd(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer **vbuffer_p, int size) @@ -211,7 +222,7 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work) if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA)) { if (resp->type >= cpu_to_le32(VIRTIO_GPU_RESP_ERR_UNSPEC)) { struct virtio_gpu_ctrl_hdr *cmd; - cmd = (struct virtio_gpu_ctrl_hdr *)entry->buf; + cmd = virtio_gpu_vbuf_ctrl_hdr(entry); DRM_ERROR("response 0x%x (command 0x%x)\n", le32_to_cpu(resp->type), le32_to_cpu(cmd->type)); @@ -338,8 +349,7 @@ static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); WARN_ON(ret);
- trace_virtio_gpu_cmd_queue(vq, - (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + trace_virtio_gpu_cmd_queue(vq, virtio_gpu_vbuf_ctrl_hdr(vbuf));
notify = virtqueue_kick_prepare(vq);
@@ -458,7 +468,7 @@ static void virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev, goto retry; } else { trace_virtio_gpu_cmd_queue(vq, - (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + virtio_gpu_vbuf_ctrl_hdr(vbuf));
notify = virtqueue_kick_prepare(vq); }
We can get it from vbuf.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 312fd8a039a1e..5815c7d50dc20 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -358,7 +358,6 @@ static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev,
static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, - struct virtio_gpu_ctrl_hdr *hdr, struct virtio_gpu_fence *fence) { struct virtqueue *vq = vgdev->ctrlq.vq; @@ -399,8 +398,9 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, goto again; }
- if (hdr && fence) { - virtio_gpu_fence_emit(vgdev, hdr, fence); + if (fence) { + virtio_gpu_fence_emit(vgdev, virtio_gpu_vbuf_ctrl_hdr(vbuf), + fence); if (vbuf->objs) { virtio_gpu_array_add_fence(vbuf->objs, &fence->f); virtio_gpu_array_unlock_resv(vbuf->objs); @@ -439,7 +439,7 @@ void virtio_gpu_enable_notify(struct virtio_gpu_device *vgdev) static void virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { - virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL); }
static void virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev, @@ -503,7 +503,7 @@ void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev, cmd_p->width = cpu_to_le32(params->width); cmd_p->height = cpu_to_le32(params->height);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); bo->created = true; }
@@ -535,7 +535,7 @@ static void virtio_gpu_cmd_resource_inval_backing(struct virtio_gpu_device *vgde cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING); cmd_p->resource_id = cpu_to_le32(resource_id);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev, @@ -610,7 +610,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, cmd_p->r.x = cpu_to_le32(x); cmd_p->r.y = cpu_to_le32(y);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
static void @@ -633,7 +633,7 @@ virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev, vbuf->data_buf = ents; vbuf->data_size = sizeof(*ents) * nents;
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev, @@ -992,7 +992,7 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, cmd_p->nr_samples = cpu_to_le32(params->nr_samples); cmd_p->flags = cpu_to_le32(params->flags);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); bo->created = true; }
@@ -1025,7 +1025,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, cmd_p->offset = cpu_to_le64(offset); cmd_p->level = cpu_to_le32(level);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, @@ -1051,7 +1051,7 @@ void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, cmd_p->offset = cpu_to_le64(offset); cmd_p->level = cpu_to_le32(level);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, @@ -1074,7 +1074,7 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); cmd_p->size = cpu_to_le32(data_size);
- virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); }
int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
We don't propagate erros to the callers. We have to unlock object arrays on errors.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 5815c7d50dc20..1e27f4c09341e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -370,8 +370,11 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, if (is_vmalloc_addr(vbuf->data_buf)) { sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, &outcnt); - if (!sgt) + if (!sgt) { + if (fence && vbuf->objs) + virtio_gpu_array_unlock_resv(vbuf->objs); return; + } vout = sgt->sgl; } else { sg_init_one(&sg, vbuf->data_buf, vbuf->data_size);
sgs setup does not need to be in the critical section.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 66 +++++++++++++++-------------- 1 file changed, 35 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 1e27f4c09341e..6ccb2a54dfb3c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -318,34 +318,19 @@ static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) return sgt; }
-static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf, - struct scatterlist *vout) +static bool virtio_gpu_queue_ctrl_sgs_locked(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf, + struct scatterlist **sgs, + int outcnt, + int incnt) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *sgs[3], vcmd, vresp; - int outcnt = 0, incnt = 0; bool notify = false; int ret;
if (!vgdev->vqs_ready) return notify;
- sg_init_one(&vcmd, vbuf->buf, vbuf->size); - sgs[outcnt + incnt] = &vcmd; - outcnt++; - - if (vout) { - sgs[outcnt + incnt] = vout; - outcnt++; - } - - if (vbuf->resp_size) { - sg_init_one(&vresp, vbuf->resp_buf, vbuf->resp_size); - sgs[outcnt + incnt] = &vresp; - incnt++; - } - ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); WARN_ON(ret);
@@ -361,26 +346,45 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_fence *fence) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *vout = NULL, sg; + struct scatterlist *sgs[3], vcmd, vout, vresp; struct sg_table *sgt = NULL; + int elemcnt = 0, outcnt = 0, incnt = 0; bool notify; - int outcnt = 0;
+ /* set up vcmd */ + sg_init_one(&vcmd, vbuf->buf, vbuf->size); + elemcnt++; + sgs[outcnt] = &vcmd; + outcnt++; + + /* set up vout */ if (vbuf->data_size) { if (is_vmalloc_addr(vbuf->data_buf)) { + int sg_ents; sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, - &outcnt); + &sg_ents); if (!sgt) { if (fence && vbuf->objs) virtio_gpu_array_unlock_resv(vbuf->objs); return; } - vout = sgt->sgl; + + elemcnt += sg_ents; + sgs[outcnt] = sgt->sgl; } else { - sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); - vout = &sg; - outcnt = 1; + sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); + elemcnt++; + sgs[outcnt] = &vout; } + outcnt++; + } + + /* set up vresp */ + if (vbuf->resp_size) { + sg_init_one(&vresp, vbuf->resp_buf, vbuf->resp_size); + elemcnt++; + sgs[outcnt + incnt] = &vresp; + incnt++; }
again: @@ -394,10 +398,9 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, * to wait for free space, which can result in fence ids being * submitted out-of-order. */ - if (vq->num_free < 2 + outcnt) { + if (vq->num_free < elemcnt) { spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, - vq->num_free >= 2 + outcnt); + wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt); goto again; }
@@ -409,7 +412,8 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, virtio_gpu_array_unlock_resv(vbuf->objs); } } - notify = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf, vout); + notify = virtio_gpu_queue_ctrl_sgs_locked(vgdev, vbuf, sgs, outcnt, + incnt); spin_unlock(&vgdev->ctrlq.qlock); if (notify) { if (vgdev->disable_notify)
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 70 ++++++++++++++--------------- 1 file changed, 35 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 6ccb2a54dfb3c..299470aac281a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -318,18 +318,43 @@ static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) return sgt; }
-static bool virtio_gpu_queue_ctrl_sgs_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf, - struct scatterlist **sgs, - int outcnt, - int incnt) +static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf, + struct virtio_gpu_fence *fence, + int elemcnt, + struct scatterlist **sgs, + int outcnt, + int incnt) { struct virtqueue *vq = vgdev->ctrlq.vq; bool notify = false; int ret;
- if (!vgdev->vqs_ready) +again: + spin_lock(&vgdev->ctrlq.qlock); + + if (vq->num_free < elemcnt) { + spin_unlock(&vgdev->ctrlq.qlock); + wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt); + goto again; + } + + /* now that the position of the vbuf in the virtqueue is known, we can + * finally set the fence id + */ + if (fence) { + virtio_gpu_fence_emit(vgdev, virtio_gpu_vbuf_ctrl_hdr(vbuf), + fence); + if (vbuf->objs) { + virtio_gpu_array_add_fence(vbuf->objs, &fence->f); + virtio_gpu_array_unlock_resv(vbuf->objs); + } + } + + if (!vgdev->vqs_ready) { + spin_unlock(&vgdev->ctrlq.qlock); return notify; + }
ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); WARN_ON(ret); @@ -338,6 +363,8 @@ static bool virtio_gpu_queue_ctrl_sgs_locked(struct virtio_gpu_device *vgdev,
notify = virtqueue_kick_prepare(vq);
+ spin_unlock(&vgdev->ctrlq.qlock); + return notify; }
@@ -345,7 +372,6 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, struct virtio_gpu_fence *fence) { - struct virtqueue *vq = vgdev->ctrlq.vq; struct scatterlist *sgs[3], vcmd, vout, vresp; struct sg_table *sgt = NULL; int elemcnt = 0, outcnt = 0, incnt = 0; @@ -387,34 +413,8 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, incnt++; }
-again: - spin_lock(&vgdev->ctrlq.qlock); - - /* - * Make sure we have enouth space in the virtqueue. If not - * wait here until we have. - * - * Without that virtio_gpu_queue_ctrl_buffer_nolock might have - * to wait for free space, which can result in fence ids being - * submitted out-of-order. - */ - if (vq->num_free < elemcnt) { - spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt); - goto again; - } - - if (fence) { - virtio_gpu_fence_emit(vgdev, virtio_gpu_vbuf_ctrl_hdr(vbuf), - fence); - if (vbuf->objs) { - virtio_gpu_array_add_fence(vbuf->objs, &fence->f); - virtio_gpu_array_unlock_resv(vbuf->objs); - } - } - notify = virtio_gpu_queue_ctrl_sgs_locked(vgdev, vbuf, sgs, outcnt, - incnt); - spin_unlock(&vgdev->ctrlq.qlock); + notify = virtio_gpu_queue_ctrl_sgs(vgdev, vbuf, fence, elemcnt, sgs, + outcnt, incnt); if (notify) { if (vgdev->disable_notify) vgdev->pending_notify = true;
When vqs_ready is false, vq should be considered invalid and we should not check vq->num_free. After this change, a fenced command queued before the vqs are ready will have fence id 0 and will be considered done.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 299470aac281a..0bf82cff8da37 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -333,6 +333,14 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, again: spin_lock(&vgdev->ctrlq.qlock);
+ if (!vgdev->vqs_ready) { + spin_unlock(&vgdev->ctrlq.qlock); + + if (fence && vbuf->objs) + virtio_gpu_array_unlock_resv(vbuf->objs); + return notify; + } + if (vq->num_free < elemcnt) { spin_unlock(&vgdev->ctrlq.qlock); wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt); @@ -351,11 +359,6 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, } }
- if (!vgdev->vqs_ready) { - spin_unlock(&vgdev->ctrlq.qlock); - return notify; - } - ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); WARN_ON(ret);
Make sure elemcnt does not exceed the maximum element count in virtio_gpu_queue_ctrl_sgs. We should improve our error handling or impose a size limit on execbuffer, which are TODOs.
Signed-off-by: Chia-I Wu olvaffe@gmail.com Cc: David Riley davidriley@chromium.org --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 + drivers/gpu/drm/virtio/virtgpu_ioctl.c | 3 +++ drivers/gpu/drm/virtio/virtgpu_kms.c | 2 ++ drivers/gpu/drm/virtio/virtgpu_vq.c | 2 +- 4 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 7e69c06e168ea..f7520feb39d4b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -143,6 +143,7 @@ struct virtio_gpu_framebuffer {
struct virtio_gpu_queue { struct virtqueue *vq; + unsigned int max_free; spinlock_t qlock; wait_queue_head_t ack_queue; struct work_struct dequeue_work; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 205ec4abae2b9..0954f61d2000f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -132,6 +132,9 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, goto out_unused_fd; }
+ /* XXX virtio_gpu_cmd_submit may fail silently when exbuf->size is + * huge + */ buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 2f5773e43557c..e7d5840e432dc 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -170,7 +170,9 @@ int virtio_gpu_init(struct drm_device *dev) goto err_vqs; } vgdev->ctrlq.vq = vqs[0]; + vgdev->ctrlq.max_free = vqs[0]->num_free; vgdev->cursorq.vq = vqs[1]; + vgdev->cursorq.max_free = vqs[1]->num_free; ret = virtio_gpu_alloc_vbufs(vgdev); if (ret) { DRM_ERROR("failed to alloc vbufs\n"); diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 0bf82cff8da37..725cfe93bcef8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -333,7 +333,7 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, again: spin_lock(&vgdev->ctrlq.qlock);
- if (!vgdev->vqs_ready) { + if (unlikely(!vgdev->vqs_ready || elemcnt > vgdev->ctrlq.max_free)) { spin_unlock(&vgdev->ctrlq.qlock);
if (fence && vbuf->objs)
On Wed, Feb 05, 2020 at 10:19:53AM -0800, Chia-I Wu wrote:
Make sure elemcnt does not exceed the maximum element count in virtio_gpu_queue_ctrl_sgs. We should improve our error handling or impose a size limit on execbuffer, which are TODOs.
Hmm, virtio supports indirect ring entries, so large execbuffers should not be a problem ...
So I've waded through the virtio code. Figured our logic is wrong. Luckily we err on the safe side (waiting for more free entries than we actually need). The patch below should fix that (not tested yet).
cheers, Gerd
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index aa25e8781404..535399b3a3ea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -328,7 +328,7 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, { struct virtqueue *vq = vgdev->ctrlq.vq; bool notify = false; - int ret; + int vqcnt, ret;
again: spin_lock(&vgdev->ctrlq.qlock); @@ -341,9 +341,10 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, return notify; }
- if (vq->num_free < elemcnt) { + vqcnt = virtqueue_use_indirect(vq, elemcnt) ? 1 : elemcnt; + if (vq->num_free < vqcnt) { spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt); + wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= vq); goto again; }
On Thu, Feb 6, 2020 at 1:49 AM Gerd Hoffmann kraxel@redhat.com wrote:
On Wed, Feb 05, 2020 at 10:19:53AM -0800, Chia-I Wu wrote:
Make sure elemcnt does not exceed the maximum element count in virtio_gpu_queue_ctrl_sgs. We should improve our error handling or impose a size limit on execbuffer, which are TODOs.
Hmm, virtio supports indirect ring entries, so large execbuffers should not be a problem ...
So I've waded through the virtio code. Figured our logic is wrong. Luckily we err on the safe side (waiting for more free entries than we actually need). The patch below should fix that (not tested yet).
That is good to know! I was not sure if we have VIRTIO_RING_F_INDIRECT_DESC so I kept our logic. I will drop this patch in v2.
cheers, Gerd
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index aa25e8781404..535399b3a3ea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -328,7 +328,7 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, { struct virtqueue *vq = vgdev->ctrlq.vq; bool notify = false;
int ret;
int vqcnt, ret;
again: spin_lock(&vgdev->ctrlq.qlock); @@ -341,9 +341,10 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, return notify; }
if (vq->num_free < elemcnt) {
vqcnt = virtqueue_use_indirect(vq, elemcnt) ? 1 : elemcnt;
if (vq->num_free < vqcnt) { spin_unlock(&vgdev->ctrlq.qlock);
wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt);
wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= vq); goto again; }
It becomes clear that virtio_gpu_queue_fenced_ctrl_buffer should be responsible for setting up sgs and virtio_gpu_queue_ctrl_sgs should be responsible for queuing sgs.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 725cfe93bcef8..0961475e68105 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -318,7 +318,7 @@ static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) return sgt; }
-static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, +static void virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, struct virtio_gpu_fence *fence, int elemcnt, @@ -338,7 +338,7 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev,
if (fence && vbuf->objs) virtio_gpu_array_unlock_resv(vbuf->objs); - return notify; + return; }
if (vq->num_free < elemcnt) { @@ -368,7 +368,12 @@ static bool virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev,
spin_unlock(&vgdev->ctrlq.qlock);
- return notify; + if (notify) { + if (vgdev->disable_notify) + vgdev->pending_notify = true; + else + virtqueue_notify(vq); + } }
static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, @@ -378,7 +383,6 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct scatterlist *sgs[3], vcmd, vout, vresp; struct sg_table *sgt = NULL; int elemcnt = 0, outcnt = 0, incnt = 0; - bool notify;
/* set up vcmd */ sg_init_one(&vcmd, vbuf->buf, vbuf->size); @@ -416,14 +420,8 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, incnt++; }
- notify = virtio_gpu_queue_ctrl_sgs(vgdev, vbuf, fence, elemcnt, sgs, - outcnt, incnt); - if (notify) { - if (vgdev->disable_notify) - vgdev->pending_notify = true; - else - virtqueue_notify(vgdev->ctrlq.vq); - } + virtio_gpu_queue_ctrl_sgs(vgdev, vbuf, fence, elemcnt, sgs, outcnt, + incnt);
if (sgt) { sg_free_table(sgt);
Call virtqueue_kick_prepare once in virtio_gpu_enable_notify, not whenever a command is added. This should be more efficient since the intention is to batch commands.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_vq.c | 28 +++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f7520feb39d4b..f0e7130ac9e27 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -179,7 +179,6 @@ struct virtio_gpu_device { bool vqs_ready;
bool disable_notify; - bool pending_notify;
struct ida resource_ida;
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 0961475e68105..aea1be68e99c4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -364,16 +364,13 @@ static void virtio_gpu_queue_ctrl_sgs(struct virtio_gpu_device *vgdev,
trace_virtio_gpu_cmd_queue(vq, virtio_gpu_vbuf_ctrl_hdr(vbuf));
- notify = virtqueue_kick_prepare(vq); + if (!vgdev->disable_notify) + notify = virtqueue_kick_prepare(vq);
spin_unlock(&vgdev->ctrlq.qlock);
- if (notify) { - if (vgdev->disable_notify) - vgdev->pending_notify = true; - else - virtqueue_notify(vq); - } + if (notify) + virtqueue_notify(vq); }
static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, @@ -436,12 +433,21 @@ void virtio_gpu_disable_notify(struct virtio_gpu_device *vgdev)
void virtio_gpu_enable_notify(struct virtio_gpu_device *vgdev) { + struct virtqueue *vq = vgdev->ctrlq.vq; + bool notify; + vgdev->disable_notify = false;
- if (!vgdev->pending_notify) - return; - vgdev->pending_notify = false; - virtqueue_notify(vgdev->ctrlq.vq); + spin_lock(&vgdev->ctrlq.qlock); + notify = virtqueue_kick_prepare(vq); + spin_unlock(&vgdev->ctrlq.qlock); + + /* Do not call virtqueue_notify with the lock held because + * virtio_gpu_dequeue_ctrl_func may contend for the lock if an irq is + * generated while we are in virtqueue_notify. + */ + if (notify) + virtqueue_notify(vq); }
static void virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
On Wed, Feb 05, 2020 at 10:19:44AM -0800, Chia-I Wu wrote:
This series consists of fixes and cleanups for virtio_gpu_queue_fenced_ctrl_buffer, except for the last patch. The fixes are for corner cases that were overlooked. The cleanups make the last patch easier, but they should be good in themselves as well.
Pushed most patches.
The last patch changes the disable_notify mechanism to call virtqueue_kick_prepare only once in virtio_gpu_enable_notify. It should be more efficient than doing that after each command is queued.
There is a follow-up patch that replaces the global disable_notify state by command-level bools to disable notify for individual commands, so that one process cannot affect another process. I can include it in v2 if you want to review it together with this series.
Can you rebase & resend so I can see all notify patches together?
thanks, Gerd
dri-devel@lists.freedesktop.org