This feature enables the Guest to wait until a flush has been performed on a buffer it has submitted to the Host.
Cc: Gerd Hoffmann kraxel@redhat.com Signed-off-by: Vivek Kasireddy vivek.kasireddy@intel.com --- include/uapi/linux/virtio_gpu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 97523a95781d..8fe58657a473 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -60,6 +60,11 @@ */ #define VIRTIO_GPU_F_RESOURCE_BLOB 3
+/* + * VIRTIO_GPU_CMD_WAIT_FLUSH + */ +#define VIRTIO_GPU_F_EXPLICIT_FLUSH 4 + enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0,
@@ -78,6 +83,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, VIRTIO_GPU_CMD_SET_SCANOUT_BLOB, + VIRTIO_GPU_CMD_WAIT_FLUSH,
/* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -441,4 +447,10 @@ struct virtio_gpu_resource_unmap_blob { __le32 padding; };
+/* VIRTIO_GPU_CMD_WAIT_FLUSH */ +struct virtio_gpu_wait_flush { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; +}; + #endif
This implements the hypercall interface for the wait_flush command.
Cc: Gerd Hoffmann kraxel@redhat.com Signed-off-by: Vivek Kasireddy vivek.kasireddy@intel.com --- drivers/gpu/drm/virtio/virtgpu_drv.h | 4 ++++ drivers/gpu/drm/virtio/virtgpu_vq.c | 17 +++++++++++++++++ 2 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index d9dbc4f258f3..f77d196ccc8f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -403,6 +403,10 @@ virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device *vgdev, struct drm_framebuffer *fb, uint32_t width, uint32_t height, uint32_t x, uint32_t y); +void virtio_gpu_cmd_wait_flush(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs, + struct virtio_gpu_fence *fence); +
/* virtgpu_display.c */ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index cf84d382dd41..0042a143e71e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -1307,3 +1307,20 @@ void virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device *vgdev,
virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); } + +void virtio_gpu_cmd_wait_flush(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); + struct virtio_gpu_wait_flush *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + vbuf->objs = objs; + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_WAIT_FLUSH); + cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); +}
If this feature is available, a fence will be associated with the scanout buffer and a dma_fence_wait will be performed as part of plane update.
Cc: Gerd Hoffmann kraxel@redhat.com Signed-off-by: Vivek Kasireddy vivek.kasireddy@intel.com --- drivers/gpu/drm/virtio/virtgpu_debugfs.c | 1 + drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + drivers/gpu/drm/virtio/virtgpu_drv.h | 1 + drivers/gpu/drm/virtio/virtgpu_kms.c | 9 ++++- drivers/gpu/drm/virtio/virtgpu_plane.c | 51 ++++++++++++++++++++---- 5 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c index c2b20e0ee030..4c258299752b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c +++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c @@ -52,6 +52,7 @@ static int virtio_gpu_features(struct seq_file *m, void *data) vgdev->has_resource_assign_uuid);
virtio_gpu_add_bool(m, "blob resources", vgdev->has_resource_blob); + virtio_gpu_add_bool(m, "explicit flush", vgdev->has_explicit_flush); virtio_gpu_add_int(m, "cap sets", vgdev->num_capsets); virtio_gpu_add_int(m, "scanouts", vgdev->num_scanouts); if (vgdev->host_visible_region.len) { diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index a21dc3ad6f88..b003523e876e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -166,6 +166,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_EDID, VIRTIO_GPU_F_RESOURCE_UUID, VIRTIO_GPU_F_RESOURCE_BLOB, + VIRTIO_GPU_F_EXPLICIT_FLUSH, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f77d196ccc8f..72552618d3c3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -233,6 +233,7 @@ struct virtio_gpu_device { bool has_resource_assign_uuid; bool has_resource_blob; bool has_host_visible; + bool has_explicit_flush; struct virtio_shm_region host_visible_region; struct drm_mm host_visible_mm;
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index b375394193be..d6ff9cb8f97e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -156,6 +156,9 @@ int virtio_gpu_init(struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) { vgdev->has_resource_blob = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_EXPLICIT_FLUSH)) { + vgdev->has_explicit_flush = true; + } if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { if (!devm_request_mem_region(&vgdev->vdev->dev, @@ -176,11 +179,13 @@ int virtio_gpu_init(struct drm_device *dev) (unsigned long)vgdev->host_visible_region.len); }
- DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible\n", + DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible\ + %cexplicit_flush\n", vgdev->has_virgl_3d ? '+' : '-', vgdev->has_edid ? '+' : '-', vgdev->has_resource_blob ? '+' : '-', - vgdev->has_host_visible ? '+' : '-'); + vgdev->has_host_visible ? '+' : '-', + vgdev->has_explicit_flush ? '+' : '-');
ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL); if (ret) { diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 4e1b17548007..9ae2ec2e9da7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -129,6 +129,30 @@ static void virtio_gpu_update_dumb_bo(struct virtio_gpu_device *vgdev, objs, NULL); }
+static void virtio_gpu_fb_wait_flush(struct drm_plane *plane) +{ + struct drm_device *dev = plane->dev; + struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_framebuffer *vgfb; + + vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + if (vgfb && vgfb->fence) { + struct virtio_gpu_object_array *objs; + + objs = virtio_gpu_array_alloc(1); + if (!objs) + return; + virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); + virtio_gpu_array_lock_resv(objs); + virtio_gpu_cmd_wait_flush(vgdev, objs, vgfb->fence); + virtio_gpu_notify(vgdev); + + dma_fence_wait(&vgfb->fence->f, true); + dma_fence_put(&vgfb->fence->f); + vgfb->fence = NULL; + } +} + static void virtio_gpu_primary_plane_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -204,17 +228,22 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane, rect.x2 - rect.x1, rect.y2 - rect.y1); virtio_gpu_notify(vgdev); + + if (vgdev->has_explicit_flush && bo->guest_blob) + virtio_gpu_fb_wait_flush(plane); }
-static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) +static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) { struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb; struct virtio_gpu_object *bo;
- if (!new_state->fb) + if (!new_state->fb || + (plane->type == DRM_PLANE_TYPE_PRIMARY && + !vgdev->has_explicit_flush)) return 0;
vgfb = to_virtio_gpu_framebuffer(new_state->fb); @@ -228,12 +257,16 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, return 0; }
-static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) +static void virtio_gpu_plane_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) { + struct drm_device *dev = plane->dev; + struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb;
- if (!plane->state->fb) + if (!plane->state->fb || + (plane->type == DRM_PLANE_TYPE_PRIMARY && + !vgdev->has_explicit_flush)) return;
vgfb = to_virtio_gpu_framebuffer(plane->state->fb); @@ -321,13 +354,15 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, }
static const struct drm_plane_helper_funcs virtio_gpu_primary_helper_funcs = { + .prepare_fb = virtio_gpu_plane_prepare_fb, + .cleanup_fb = virtio_gpu_plane_cleanup_fb, .atomic_check = virtio_gpu_plane_atomic_check, .atomic_update = virtio_gpu_primary_plane_update, };
static const struct drm_plane_helper_funcs virtio_gpu_cursor_helper_funcs = { - .prepare_fb = virtio_gpu_cursor_prepare_fb, - .cleanup_fb = virtio_gpu_cursor_cleanup_fb, + .prepare_fb = virtio_gpu_plane_prepare_fb, + .cleanup_fb = virtio_gpu_plane_cleanup_fb, .atomic_check = virtio_gpu_plane_atomic_check, .atomic_update = virtio_gpu_cursor_plane_update, };
On Tue, May 11, 2021 at 01:36:08AM -0700, Vivek Kasireddy wrote:
This feature enables the Guest to wait until a flush has been performed on a buffer it has submitted to the Host.
This needs a virtio-spec update documenting the new feature.
- VIRTIO_GPU_CMD_WAIT_FLUSH,
Why a new command?
If I understand it correctly you want wait until VIRTIO_GPU_CMD_RESOURCE_FLUSH is done. We could extend the VIRTIO_GPU_CMD_RESOURCE_FLUSH command for that instead.
take care, Gerd
Hi Gerd,
On Tue, May 11, 2021 at 01:36:08AM -0700, Vivek Kasireddy wrote:
This feature enables the Guest to wait until a flush has been performed on a buffer it has submitted to the Host.
This needs a virtio-spec update documenting the new feature.
[Kasireddy, Vivek] Yes, I was planning to do that after getting your thoughts on this feature.
- VIRTIO_GPU_CMD_WAIT_FLUSH,
Why a new command?
If I understand it correctly you want wait until VIRTIO_GPU_CMD_RESOURCE_FLUSH is done. We could extend the VIRTIO_GPU_CMD_RESOURCE_FLUSH command for that instead.
[Kasireddy, Vivek] VIRTIO_GPU_CMD_RESOURCE_FLUSH can trigger/queue a redraw that may be performed synchronously or asynchronously depending on the UI (Glarea is async and gtk-egl is sync but can be made async). I'd like to make the Guest wait until the actual redraw happens (until GlFLush or eglSwapBuffers, again depending on the UI).
However, as part of this feature (explicit flush), I'd like to make the Guest wait until the current resource (as specified by resource_flush or set_scanout) is flushed or synchronized. But for a different feature I am thinking of (explicit sync), I'd like to make the Guest wait for the previous buffer/resource submitted (available via old_state->fb).
I think it may be possible to accomplish both features by overloading resource_flush but given the various combinations of Guests (Android/Chrome OS, Windows, Linux) and Hosts (Android/Chrome OS, Linux) that are or will be supported with virtio-gpu + i915, I figured adding a new command might be cleaner.
Thanks, Vivek
take care, Gerd
Hi,
However, as part of this feature (explicit flush), I'd like to make the Guest wait until the current resource (as specified by resource_flush or set_scanout) is flushed or synchronized. But for a different feature I am thinking of (explicit sync), I'd like to make the Guest wait for the previous buffer/resource submitted (available via old_state->fb).
For page-flipping I guess? i.e. you want submit a new framebuffer, then wait until the host doesn't need the previous one? That is likewise linked to a command, although it is set_scanout this time.
So, right now qemu simply queues the request and completes the command when a guest sends a resource_flush our set_scanout command. You want be notified when the host is actually done processing the request.
I still think it makes sense extend the resource_flush and set_scanout commands for that, for example by adding a flag for the flags field in the request header. That way it is clear what exactly you are waiting for. You can also attach a fence to the request which you can later wait on.
take care, Gerd
Hi Gerd,
However, as part of this feature (explicit flush), I'd like to make the Guest wait until the current resource (as specified by resource_flush or set_scanout) is flushed or synchronized. But for a different feature I am thinking of (explicit sync), I'd like to make the Guest wait for the previous buffer/resource submitted (available via old_state->fb).
For page-flipping I guess? i.e. you want submit a new framebuffer, then wait until the host doesn't need the previous one? That is likewise linked to a command, although it is set_scanout this time.
[Kasireddy, Vivek] Mainly for page-flipping but I'd also like to have fbcon, Xorg that do frontbuffer rendering/updates to work seamlessly as well.
So, right now qemu simply queues the request and completes the command when a guest sends a resource_flush our set_scanout command. You want be notified when the host is actually done processing the request.
[Kasireddy, Vivek] Correct, that is exactly what I want -- make the Guest wait until it gets notified that the Host is completely done processing/using the fb. However, there can be two resources the guest can be made to wait on: wait for the new/current fb that is being submitted to be processed (explicit flush) or wait for the previous fb that was submitted earlier (in the previous repaint cycle) to be processed (explicit sync).
IIUC, Explicit sync only makes sense if 1) the Host windowing system also supports that feature/protocol (currently only upstream Weston does but I'd like to add it to Mutter if no one else does) or if there is a way to figure out (dma-buf sync file?) if the Host has completely processed the fb and 2) if Qemu UI is not doing a blit and instead submitting the guest fb/dmabuf directly to the Host windowing system. As you are aware, 2) can possibly be done with dbus/pipewire Qemu UI backends (I'll explore this soon) but not with GTK or SDL.
Ideally, I'd like to have Explicit sync but until 2) above happens, Explicit flush can be a reasonable alternative given the blit that happens with GTK/SDL backends. By making the Guest wait until the UI has submitted the buffer to the Host windowing system, we can theoretically tie the Guest repaint freq to the Host vblank and thus have Guest apps render at 60 FPS by default instead of 90 FPS that I see without expflush. This feature would also help Windows guests that can only do frontbuffer rendering with Blobs.
I still think it makes sense extend the resource_flush and set_scanout commands for that, for example by adding a flag for the flags field in the request header. That way it is clear what exactly you are waiting for. You can also attach a fence to the request which you can later wait on.
[Kasireddy, Vivek] Yeah, I am reluctant to add a new cmd as well but I want it to work for all Guests and Hosts. Anyway, let me try your idea of adding specific flags and see if it works.
Thanks, Vivek
take care, Gerd
On Wed, May 12, 2021 at 09:18:37PM +0000, Kasireddy, Vivek wrote:
Hi Gerd,
However, as part of this feature (explicit flush), I'd like to make the Guest wait until the current resource (as specified by resource_flush or set_scanout) is flushed or synchronized. But for a different feature I am thinking of (explicit sync), I'd like to make the Guest wait for the previous buffer/resource submitted (available via old_state->fb).
For page-flipping I guess? i.e. you want submit a new framebuffer, then wait until the host doesn't need the previous one? That is likewise linked to a command, although it is set_scanout this time.
[Kasireddy, Vivek] Mainly for page-flipping but I'd also like to have fbcon, Xorg that do frontbuffer rendering/updates to work seamlessly as well.
So, right now qemu simply queues the request and completes the command when a guest sends a resource_flush our set_scanout command. You want be notified when the host is actually done processing the request.
[Kasireddy, Vivek] Correct, that is exactly what I want -- make the Guest wait until it gets notified that the Host is completely done processing/using the fb. However, there can be two resources the guest can be made to wait on: wait for the new/current fb that is being submitted to be processed (explicit flush)
That would be wait on resource_flush case, right?
or wait for the previous fb that was submitted earlier (in the previous repaint cycle) to be processed (explicit sync).
That would be the wait on set_scanout case, right?
And it would effectively wait on the previous fb not being needed by the host any more (because the page-flip to the new fb completed) so the guest can re-use the previous fb to render the next frame, right?
(also when doing front-buffer rendering with xorg/fbcon and then doing a virtual console switch the guest could wait for the console switch being completed).
IIUC, Explicit sync only makes sense if 1) the Host windowing system also supports that feature/protocol (currently only upstream Weston does but I'd like to add it to Mutter if no one else does) or if there is a way to figure out (dma-buf sync file?) if the Host has completely processed the fb and 2) if Qemu UI is not doing a blit and instead submitting the guest fb/dmabuf directly to the Host windowing system. As you are aware, 2) can possibly be done with dbus/pipewire Qemu UI backends (I'll explore this soon) but not with GTK or SDL.
Well, I think we need to clearly define the wait flag semantics. Should resource_flush with wait flag wait until the host is done reading the resource (blit done)? Or should it wait until the host screen has been updated (gtk draw callback completed)?
Everything else will be a host/guest implementation detail then, and of course this needs some integration with the UI on the host side and different UIs might have to do different things.
On the guest side integrating this with fences will give us enough flexibility on how we want handle the waits. Simplest would be to just block. We could implement virtual vblanks, which would probably make most userspace work fine without explicit virtio-gpu support. If needed we could even give userspace access to the fence so it can choose how to wait.
take care, Gerd
Hi Gerd,
[Kasireddy, Vivek] Correct, that is exactly what I want -- make the Guest wait until it gets notified that the Host is completely done processing/using the fb. However, there can be two resources the guest can be made to wait on: wait for the new/current fb that is being submitted to be processed (explicit flush)
That would be wait on resource_flush case, right?
[Kasireddy, Vivek] Yes, correct.
or wait for the previous fb that was submitted earlier (in the previous repaint cycle) to be processed (explicit sync).
That would be the wait on set_scanout case, right?
[Kasireddy, Vivek] Right.
And it would effectively wait on the previous fb not being needed by the host any more (because the page-flip to the new fb completed) so the guest can re-use the previous fb to render the next frame, right?
[Kasireddy, Vivek] Yup.
(also when doing front-buffer rendering with xorg/fbcon and then doing a virtual console switch the guest could wait for the console switch being completed).
IIUC, Explicit sync only makes sense if 1) the Host windowing system also supports that feature/protocol (currently only upstream Weston does but I'd like to add it to Mutter if no one else does) or if there is a way to figure out (dma-buf sync file?) if the Host has completely processed the fb and 2) if Qemu UI is not doing a blit and instead submitting the guest fb/dmabuf directly to the Host windowing system. As you are aware, 2) can possibly be done with dbus/pipewire Qemu UI backends (I'll explore this soon) but not with GTK or SDL.
Well, I think we need to clearly define the wait flag semantics.
[Kasireddy, Vivek] At-least with our passthrough use-case (maybe not with Virgl), I think we need to ensure the following criteria: 1) With Blobs, ensure that the Guest and Host would never use the dmabuf/FB at the same time. 2) The Guest should not render more frames than the refresh rate of the Host so that GPU resources are not wasted.
Should resource_flush with wait flag wait until the host is done reading the resource (blit done)?
[Kasireddy, Vivek] I started with this but did not find it useful as it did not meet 2) above. However, I think we could have a flag for this if the Guest is using a virtual vblank/timer and only wants to wait until the blit is done.
Or should it wait until the host screen has been updated (gtk draw callback completed)?
[Kasireddy, Vivek] This is what the last 7 patches of my Blob series (v3) do. So, we'd want to have a separate flag for this as well. And, lastly, we are going to need another flag for the set_scanout case where we wait for the previous fb to be synchronized.
Everything else will be a host/guest implementation detail then, and of course this needs some integration with the UI on the host side and different UIs might have to do different things.
[Kasireddy, Vivek] Sure, I think we can start with GTK and go from there.
On the guest side integrating this with fences will give us enough flexibility on how we want handle the waits. Simplest would be to just block.
[Kasireddy, Vivek] I agree; simply blocking (dma_fence_wait) is more than enough for most use-cases.
We could implement virtual vblanks, which would probably make most userspace work fine without explicit virtio-gpu support. If needed we could even give userspace access to the fence so it can choose how to wait.
[Kasireddy, Vivek] Virtual vblanks is not a bad idea but I think blocking with fences in the Guest kernel space seems more simpler. And, sharing fences with the Guest compositor is also very interesting but I suspect we might need to modify the compositor for this use-case, which might be a non-starter. Lastly, even with virtual vblanks, we still need to make sure that we meet the two criteria mentioned above.
Thanks, Vivek
Hi Gerd, Any further comments on this?
Thanks, Vivek
Hi Gerd,
[Kasireddy, Vivek] Correct, that is exactly what I want -- make the Guest wait until it gets notified that the Host is completely done processing/using the
fb.
However, there can be two resources the guest can be made to wait on: wait for the new/current fb that is being submitted to be processed (explicit flush)
That would be wait on resource_flush case, right?
[Kasireddy, Vivek] Yes, correct.
or wait for the previous fb that was submitted earlier (in the previous repaint cycle) to be processed (explicit sync).
That would be the wait on set_scanout case, right?
[Kasireddy, Vivek] Right.
And it would effectively wait on the previous fb not being needed by the host any more (because the page-flip to the new fb completed) so the guest can re-use the previous fb to render the next frame, right?
[Kasireddy, Vivek] Yup.
(also when doing front-buffer rendering with xorg/fbcon and then doing a virtual console switch the guest could wait for the console switch being completed).
IIUC, Explicit sync only makes sense if 1) the Host windowing system also supports that feature/protocol (currently only upstream Weston does but I'd like to add it to Mutter if no one else does) or if there is a way to figure out (dma-buf sync file?) if the Host has completely processed the fb and 2) if Qemu UI is not doing a blit and instead
submitting the guest fb/dmabuf directly to the Host windowing system.
As you are aware, 2) can possibly be done with dbus/pipewire Qemu UI backends (I'll explore this soon) but not with GTK or SDL.
Well, I think we need to clearly define the wait flag semantics.
[Kasireddy, Vivek] At-least with our passthrough use-case (maybe not with Virgl), I think we need to ensure the following criteria:
- With Blobs, ensure that the Guest and Host would never use the dmabuf/FB at the same
time. 2) The Guest should not render more frames than the refresh rate of the Host so that GPU resources are not wasted.
Should resource_flush with wait flag wait until the host is done reading the resource (blit done)?
[Kasireddy, Vivek] I started with this but did not find it useful as it did not meet 2) above. However, I think we could have a flag for this if the Guest is using a virtual vblank/timer and only wants to wait until the blit is done.
Or should it wait until the host screen has been updated (gtk draw callback completed)?
[Kasireddy, Vivek] This is what the last 7 patches of my Blob series (v3) do. So, we'd want to have a separate flag for this as well. And, lastly, we are going to need another flag for the set_scanout case where we wait for the previous fb to be synchronized.
Everything else will be a host/guest implementation detail then, and of course this needs some integration with the UI on the host side and different UIs might have to do different things.
[Kasireddy, Vivek] Sure, I think we can start with GTK and go from there.
On the guest side integrating this with fences will give us enough flexibility on how we want handle the waits. Simplest would be to just block.
[Kasireddy, Vivek] I agree; simply blocking (dma_fence_wait) is more than enough for most use-cases.
We could implement virtual vblanks, which would probably make most userspace work fine without explicit virtio-gpu support. If needed we could even give userspace access to the fence so it can choose how to wait.
[Kasireddy, Vivek] Virtual vblanks is not a bad idea but I think blocking with fences in the Guest kernel space seems more simpler. And, sharing fences with the Guest compositor is also very interesting but I suspect we might need to modify the compositor for this use- case, which might be a non-starter. Lastly, even with virtual vblanks, we still need to make sure that we meet the two criteria mentioned above.
Thanks, Vivek
dri-devel@lists.freedesktop.org