This is motivated by having meaningful ftrace events, but it also fixes use cases where dma_fence_is_later is called, such as in sync_file_merge.
In other drivers, fence creation and cmdbuf submission normally happen atomically,
mutex_lock(); fence = dma_fence_create(..., ++timeline->seqno); submit_cmdbuf(); mutex_unlock();
and have no such issue. But in our driver, because most ioctls queue commands into ctrlq, we do not want to grab a lock. Instead, we set seqno to 0 when a fence is created, and update it when the command is finally queued and the seqno is known.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_fence.c | 17 ++++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 491dec0712b3..90461feeafdb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -102,7 +102,6 @@ struct virtio_gpu_fence { struct dma_fence f; struct virtio_gpu_fence_driver *drv; struct list_head node; - uint64_t seq; }; #define to_virtio_fence(x) \ container_of(x, struct virtio_gpu_fence, f) diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c index 87d1966192f4..72b4f7561432 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -40,16 +40,14 @@ bool virtio_fence_signaled(struct dma_fence *f) { struct virtio_gpu_fence *fence = to_virtio_fence(f);
- if (atomic64_read(&fence->drv->last_seq) >= fence->seq) + if (atomic64_read(&fence->drv->last_seq) >= fence->f.seqno) return true; return false; }
static void virtio_fence_value_str(struct dma_fence *f, char *str, int size) { - struct virtio_gpu_fence *fence = to_virtio_fence(f); - - snprintf(str, size, "%llu", fence->seq); + snprintf(str, size, "%llu", f->seqno); }
static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size) @@ -76,6 +74,11 @@ struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev) return fence;
fence->drv = drv; + + /* This only partially initializes the fence because the seqno is + * unknown yet. The fence must not be used outside of the driver + * until virtio_gpu_fence_emit is called. + */ dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock, drv->context, 0);
return fence; @@ -89,13 +92,13 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, unsigned long irq_flags;
spin_lock_irqsave(&drv->lock, irq_flags); - fence->seq = ++drv->sync_seq; + fence->f.seqno = ++drv->sync_seq; dma_fence_get(&fence->f); list_add_tail(&fence->node, &drv->fences); spin_unlock_irqrestore(&drv->lock, irq_flags);
cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE); - cmd_hdr->fence_id = cpu_to_le64(fence->seq); + cmd_hdr->fence_id = cpu_to_le64(fence->f.seqno); return 0; }
@@ -109,7 +112,7 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev, spin_lock_irqsave(&drv->lock, irq_flags); atomic64_set(&vgdev->fence_drv.last_seq, last_seq); list_for_each_entry_safe(fence, tmp, &drv->fences, node) { - if (last_seq < fence->seq) + if (last_seq < fence->f.seqno) continue; dma_fence_signal_locked(&fence->f); list_del(&fence->node);
For most drivers, drm_fence_init is followed by drm_fence_emit immediately. But for our driver, they are done separately. We also don't know the fence seqno until drm_fence_emit.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/virtgpu_fence.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c index 72b4f7561432..e22918ca621b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -24,6 +24,7 @@ */
#include <drm/drmP.h> +#include <trace/events/dma_fence.h> #include "virtgpu_drv.h"
static const char *virtio_get_driver_name(struct dma_fence *f) @@ -97,6 +98,8 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, list_add_tail(&fence->node, &drv->fences); spin_unlock_irqrestore(&drv->lock, irq_flags);
+ trace_dma_fence_emit(&fence->f); + cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE); cmd_hdr->fence_id = cpu_to_le64(fence->f.seqno); return 0;
Trace when commands are queued for both ctrlq and cursorq. Trace when responses are received for ctrlq.
Signed-off-by: Chia-I Wu olvaffe@gmail.com --- drivers/gpu/drm/virtio/Makefile | 2 +- drivers/gpu/drm/virtio/virtgpu_trace.h | 52 +++++++++++++++++++ drivers/gpu/drm/virtio/virtgpu_trace_points.c | 5 ++ drivers/gpu/drm/virtio/virtgpu_vq.c | 10 ++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/virtio/virtgpu_trace.h create mode 100644 drivers/gpu/drm/virtio/virtgpu_trace_points.c
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile index 4e90cc8fa651..42949a17ff70 100644 --- a/drivers/gpu/drm/virtio/Makefile +++ b/drivers/gpu/drm/virtio/Makefile @@ -6,6 +6,6 @@ virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_gem.o \ virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \ virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \ - virtgpu_ioctl.o virtgpu_prime.o + virtgpu_ioctl.o virtgpu_prime.o virtgpu_trace_points.o
obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio-gpu.o diff --git a/drivers/gpu/drm/virtio/virtgpu_trace.h b/drivers/gpu/drm/virtio/virtgpu_trace.h new file mode 100644 index 000000000000..711ecc2bd241 --- /dev/null +++ b/drivers/gpu/drm/virtio/virtgpu_trace.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_VIRTGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _VIRTGPU_TRACE_H_ + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM virtio_gpu +#define TRACE_INCLUDE_FILE virtgpu_trace + +DECLARE_EVENT_CLASS(virtio_gpu_cmd, + TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr), + TP_ARGS(vq, hdr), + TP_STRUCT__entry( + __field(int, dev) + __field(unsigned int, vq) + __field(const char *, name) + __field(u32, type) + __field(u32, flags) + __field(u64, fence_id) + __field(u32, ctx_id) + ), + TP_fast_assign( + __entry->dev = vq->vdev->index; + __entry->vq = vq->index; + __entry->name = vq->name; + __entry->type = le32_to_cpu(hdr->type); + __entry->flags = le32_to_cpu(hdr->flags); + __entry->fence_id = le64_to_cpu(hdr->fence_id); + __entry->ctx_id = le32_to_cpu(hdr->ctx_id); + ), + TP_printk("vdev=%d vq=%u name=%s type=0x%x flags=0x%x fence_id=%llu ctx_id=%u", + __entry->dev, __entry->vq, __entry->name, + __entry->type, __entry->flags, __entry->fence_id, + __entry->ctx_id) +); + +DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_queue, + TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr), + TP_ARGS(vq, hdr) +); + +DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_response, + TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr), + TP_ARGS(vq, hdr) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/virtio +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/virtio/virtgpu_trace_points.c b/drivers/gpu/drm/virtio/virtgpu_trace_points.c new file mode 100644 index 000000000000..1970cb6f24ef --- /dev/null +++ b/drivers/gpu/drm/virtio/virtgpu_trace_points.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "virtgpu_drv.h" + +#define CREATE_TRACE_POINTS +#include "virtgpu_trace.h" diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index e62fe24b1a2e..2c5eeccb88c0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -28,6 +28,7 @@
#include <drm/drmP.h> #include "virtgpu_drv.h" +#include "virtgpu_trace.h" #include <linux/virtio.h> #include <linux/virtio_config.h> #include <linux/virtio_ring.h> @@ -192,6 +193,9 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
list_for_each_entry_safe(entry, tmp, &reclaim_list, list) { resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf; + + trace_virtio_gpu_cmd_response(vgdev->ctrlq.vq, resp); + if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA)) { if (resp->type >= cpu_to_le32(VIRTIO_GPU_RESP_ERR_UNSPEC)) { struct virtio_gpu_ctrl_hdr *cmd; @@ -284,6 +288,9 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, spin_lock(&vgdev->ctrlq.qlock); goto retry; } else { + trace_virtio_gpu_cmd_queue(vq, + (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + virtqueue_kick(vq); }
@@ -359,6 +366,9 @@ static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev, spin_lock(&vgdev->cursorq.qlock); goto retry; } else { + trace_virtio_gpu_cmd_queue(vq, + (struct virtio_gpu_ctrl_hdr *)vbuf->buf); + virtqueue_kick(vq); }
(Add missing CCs)
On Mon, Apr 29, 2019 at 3:08 PM Chia-I Wu olvaffe@gmail.com wrote:
This is motivated by having meaningful ftrace events, but it also fixes use cases where dma_fence_is_later is called, such as in sync_file_merge.
In other drivers, fence creation and cmdbuf submission normally happen atomically,
mutex_lock(); fence = dma_fence_create(..., ++timeline->seqno); submit_cmdbuf(); mutex_unlock();
and have no such issue. But in our driver, because most ioctls queue commands into ctrlq, we do not want to grab a lock. Instead, we set seqno to 0 when a fence is created, and update it when the command is finally queued and the seqno is known.
Signed-off-by: Chia-I Wu olvaffe@gmail.com
drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_fence.c | 17 ++++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 491dec0712b3..90461feeafdb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -102,7 +102,6 @@ struct virtio_gpu_fence { struct dma_fence f; struct virtio_gpu_fence_driver *drv; struct list_head node;
uint64_t seq;
}; #define to_virtio_fence(x) \ container_of(x, struct virtio_gpu_fence, f) diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c index 87d1966192f4..72b4f7561432 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -40,16 +40,14 @@ bool virtio_fence_signaled(struct dma_fence *f) { struct virtio_gpu_fence *fence = to_virtio_fence(f);
if (atomic64_read(&fence->drv->last_seq) >= fence->seq)
if (atomic64_read(&fence->drv->last_seq) >= fence->f.seqno) return true; return false;
}
static void virtio_fence_value_str(struct dma_fence *f, char *str, int size) {
struct virtio_gpu_fence *fence = to_virtio_fence(f);
snprintf(str, size, "%llu", fence->seq);
snprintf(str, size, "%llu", f->seqno);
}
static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size) @@ -76,6 +74,11 @@ struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev) return fence;
fence->drv = drv;
/* This only partially initializes the fence because the seqno is
* unknown yet. The fence must not be used outside of the driver
* until virtio_gpu_fence_emit is called.
*/ dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock, drv->context, 0); return fence;
@@ -89,13 +92,13 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, unsigned long irq_flags;
spin_lock_irqsave(&drv->lock, irq_flags);
fence->seq = ++drv->sync_seq;
fence->f.seqno = ++drv->sync_seq; dma_fence_get(&fence->f); list_add_tail(&fence->node, &drv->fences); spin_unlock_irqrestore(&drv->lock, irq_flags); cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
cmd_hdr->fence_id = cpu_to_le64(fence->seq);
cmd_hdr->fence_id = cpu_to_le64(fence->f.seqno); return 0;
}
@@ -109,7 +112,7 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev, spin_lock_irqsave(&drv->lock, irq_flags); atomic64_set(&vgdev->fence_drv.last_seq, last_seq); list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
if (last_seq < fence->seq)
if (last_seq < fence->f.seqno) continue; dma_fence_signal_locked(&fence->f); list_del(&fence->node);
-- 2.21.0.593.g511ec345e18-goog
Hi Chia-I,
On Mon, 29 Apr 2019 at 23:08, Chia-I Wu olvaffe@gmail.com wrote:
This is motivated by having meaningful ftrace events, but it also fixes use cases where dma_fence_is_later is called, such as in sync_file_merge.
In other drivers, fence creation and cmdbuf submission normally happen atomically,
mutex_lock(); fence = dma_fence_create(..., ++timeline->seqno); submit_cmdbuf(); mutex_unlock();
and have no such issue. But in our driver, because most ioctls queue commands into ctrlq, we do not want to grab a lock. Instead, we set seqno to 0 when a fence is created, and update it when the command is finally queued and the seqno is known.
Signed-off-by: Chia-I Wu olvaffe@gmail.com
The series looks great. For the lot: Reviewed-by: Emil Velikov emil.velikov@collabora.com
-Emil
On Mon, Apr 29, 2019 at 03:08:23PM -0700, Chia-I Wu wrote:
This is motivated by having meaningful ftrace events, but it also fixes use cases where dma_fence_is_later is called, such as in sync_file_merge.
In other drivers, fence creation and cmdbuf submission normally happen atomically,
mutex_lock(); fence = dma_fence_create(..., ++timeline->seqno); submit_cmdbuf(); mutex_unlock();
and have no such issue. But in our driver, because most ioctls queue commands into ctrlq, we do not want to grab a lock. Instead, we set seqno to 0 when a fence is created, and update it when the command is finally queued and the seqno is known.
Series pushed to drm-misc-next.
thanks, Gerd
dri-devel@lists.freedesktop.org