Hi nicolas,
Thanks for your suggestion. On Fri, 2022-01-28 at 16:49 -0500, Nicolas Dufresne wrote:
Hi Yunfei,
thanks for you work, see comments below...
Le samedi 22 janvier 2022 à 11:53 +0800, Yunfei Dong a écrit :
For lat and core decode in parallel, need to get capture buffer when core start to decode and put capture buffer to display list when core decode done.
Signed-off-by: Yunfei Dong yunfei.dong@mediatek.com
.../mtk-vcodec/mtk_vcodec_dec_stateless.c | 121 ++++++++++++
.../platform/mtk-vcodec/mtk_vcodec_drv.h | 5 +- .../mtk-vcodec/vdec/vdec_h264_req_if.c | 16 ++- 3 files changed, 102 insertions(+), 40 deletions(-)
diff --git a/drivers/media/platform/mtk- vcodec/mtk_vcodec_dec_stateless.c b/drivers/media/platform/mtk- vcodec/mtk_vcodec_dec_stateless.c index 23a154c4e321..6d481410bf89 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c @@ -108,37 +108,87 @@ static const struct mtk_codec_framesizes mtk_vdec_framesizes[] = {
#define NUM_SUPPORTED_FRAMESIZE ARRAY_SIZE(mtk_vdec_framesizes)
-static void mtk_vdec_stateless_set_dst_payload(struct mtk_vcodec_ctx *ctx,
struct vdec_fb *fb)
+static void mtk_vdec_stateless_out_to_done(struct mtk_vcodec_ctx *ctx,
struct mtk_vcodec_mem *bs,
int error) {
- struct mtk_video_dec_buf *vdec_frame_buf =
container_of(fb, struct mtk_video_dec_buf,
frame_buffer);
- struct vb2_v4l2_buffer *vb = &vdec_frame_buf->m2m_buf.vb;
- unsigned int cap_y_size = ctx-
q_data[MTK_Q_DATA_DST].sizeimage[0];
- struct mtk_video_dec_buf *out_buf;
- struct vb2_v4l2_buffer *vb;
- vb2_set_plane_payload(&vb->vb2_buf, 0, cap_y_size);
- if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) {
unsigned int cap_c_size =
ctx->q_data[MTK_Q_DATA_DST].sizeimage[1];
- if (!bs) {
mtk_v4l2_err("Free bitstream buffer fail.");
return;
- }
- out_buf = container_of(bs, struct mtk_video_dec_buf,
bs_buffer);
- vb = &out_buf->m2m_buf.vb;
vb2_set_plane_payload(&vb->vb2_buf, 1, cap_c_size);
- mtk_v4l2_debug(2, "Free bitsteam buffer id = %d to done_list",
vb->vb2_buf.index);
- v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
- if (error) {
v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
if (error == -EIO)
out_buf->error = true;
- } else {
}v4l2_m2m_buf_done(vb, VB2_BUF_STATE_DONE);
}
-static struct vdec_fb *vdec_get_cap_buffer(struct mtk_vcodec_ctx *ctx,
struct vb2_v4l2_buffer
*vb2_v4l2) +static void mtk_vdec_stateless_cap_to_disp(struct mtk_vcodec_ctx *ctx,
struct vdec_fb *fb, int
error) {
- struct mtk_video_dec_buf *framebuf =
container_of(vb2_v4l2, struct mtk_video_dec_buf,
m2m_buf.vb);
- struct vdec_fb *pfb = &framebuf->frame_buffer;
- struct vb2_buffer *dst_buf = &vb2_v4l2->vb2_buf;
- struct mtk_video_dec_buf *vdec_frame_buf;
- struct vb2_v4l2_buffer *vb;
- unsigned int cap_y_size, cap_c_size;
- if (!fb) {
mtk_v4l2_err("Free frame buffer fail.");
return;
- }
- vdec_frame_buf = container_of(fb, struct mtk_video_dec_buf,
frame_buffer);
- vb = &vdec_frame_buf->m2m_buf.vb;
- cap_y_size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[0];
- cap_c_size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[1];
- v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
- pfb->base_y.va = NULL;
- vb2_set_plane_payload(&vb->vb2_buf, 0, cap_y_size);
- if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2)
vb2_set_plane_payload(&vb->vb2_buf, 1, cap_c_size);
- mtk_v4l2_debug(2, "Free frame buffer id = %d to done_list",
vb->vb2_buf.index);
- if (error)
v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
- else
v4l2_m2m_buf_done(vb, VB2_BUF_STATE_DONE);
+}
+static struct vdec_fb *vdec_get_cap_buffer(struct mtk_vcodec_ctx *ctx) +{
- struct mtk_video_dec_buf *framebuf;
- struct vb2_v4l2_buffer *vb2_v4l2;
- struct vb2_buffer *dst_buf;
- struct vdec_fb *pfb;
- vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
- if (!vb2_v4l2) {
mtk_v4l2_debug(1, "[%d] dst_buf empty!!", ctx->id);
return NULL;
- }
- dst_buf = &vb2_v4l2->vb2_buf;
- framebuf = container_of(vb2_v4l2, struct mtk_video_dec_buf,
m2m_buf.vb);
- pfb = &framebuf->frame_buffer;
- pfb->base_y.va = vb2_plane_vaddr(dst_buf, 0); pfb->base_y.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf,
0); pfb->base_y.size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[0];
if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) {
pfb->base_c.va = NULL;
pfb->base_c.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf, 1); pfb->base_c.size = ctx-pfb->base_c.va = vb2_plane_vaddr(dst_buf, 1);
q_data[MTK_Q_DATA_DST].sizeimage[1];
@@ -162,12 +212,11 @@ static void mtk_vdec_worker(struct work_struct *work) struct mtk_vcodec_ctx *ctx = container_of(work, struct mtk_vcodec_ctx, decode_work); struct mtk_vcodec_dev *dev = ctx->dev;
- struct vb2_v4l2_buffer *vb2_v4l2_src, *vb2_v4l2_dst;
- struct vb2_v4l2_buffer *vb2_v4l2_src; struct vb2_buffer *vb2_src; struct mtk_vcodec_mem *bs_src; struct mtk_video_dec_buf *dec_buf_src; struct media_request *src_buf_req;
- struct vdec_fb *dst_buf; bool res_chg = false; int ret;
@@ -178,13 +227,6 @@ static void mtk_vdec_worker(struct work_struct *work) return; }
- vb2_v4l2_dst = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
- if (!vb2_v4l2_dst) {
v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx);
mtk_v4l2_debug(1, "[%d] no available destination
buffer", ctx->id);
return;
- }
- vb2_src = &vb2_v4l2_src->vb2_buf; dec_buf_src = container_of(vb2_v4l2_src, struct
mtk_video_dec_buf, m2m_buf.vb); @@ -193,9 +235,15 @@ static void mtk_vdec_worker(struct work_struct *work) mtk_v4l2_debug(3, "[%d] (%d) id=%d, vb=%p", ctx->id, vb2_src->vb2_queue->type, vb2_src->index, vb2_src);
- bs_src->va = NULL;
- bs_src->va = vb2_plane_vaddr(vb2_src, 0); bs_src->dma_addr = vb2_dma_contig_plane_dma_addr(vb2_src, 0); bs_src->size = (size_t)vb2_src->planes[0].bytesused;
- if (!bs_src->va) {
v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx);
mtk_v4l2_err("[%d] id=%d source buffer is NULL", ctx-
id,
vb2_src->index);
return;
}
mtk_v4l2_debug(3, "[%d] Bitstream VA=%p DMA=%pad Size=%zx
vb=%p", ctx->id, bs_src->va, &bs_src->dma_addr, bs_src-
size, vb2_src);
@@ -206,9 +254,7 @@ static void mtk_vdec_worker(struct work_struct *work) else mtk_v4l2_err("vb2 buffer media request is NULL");
- dst_buf = vdec_get_cap_buffer(ctx, vb2_v4l2_dst);
- v4l2_m2m_buf_copy_metadata(vb2_v4l2_src, vb2_v4l2_dst, true);
Please keep using this helper, it is specially crafted to ease maintenance.
Move this function to each codec inside, for lat and core decode in parallel, can't use this function directly in lat workqueue.
- ret = vdec_if_decode(ctx, bs_src, dst_buf, &res_chg);
- ret = vdec_if_decode(ctx, bs_src, NULL, &res_chg); if (ret) { mtk_v4l2_err(" <===[%d], src_buf[%d] sz=0x%zx pts=%llu
vdec_if_decode() ret=%d res_chg=%d===>", ctx->id, vb2_src->index, bs_src->size, @@ -220,12 +266,9 @@ static void mtk_vdec_worker(struct work_struct *work) } }
- mtk_vdec_stateless_set_dst_payload(ctx, dst_buf);
- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev_dec, ctx-
m2m_ctx,
ret ? VB2_BUF_STATE_ERROR :
VB2_BUF_STATE_DONE);
- mtk_vdec_stateless_out_to_done(ctx, bs_src, ret);
v4l2_m2m_buf_done_and_job_finish() was specially crafted to prevent developer from implementing the signalling of the request at the wrong moment. This patch broke this strict ordering. The relevant comment in the helper function:
As we discussed in chat, please help to check whether it's possible to let lat and core decode in parallel.
I will continue to fix h264 issue.
Thanks for your help.
Best Regards, Yunfei Dong
/* * If the request API is being used, returning the OUTPUT * (src) buffer will wake-up any process waiting on the * request file descriptor. * * Therefore, return the CAPTURE (dst) buffer first, * to avoid signalling the request file descriptor * before the CAPTURE buffer is done. */
In short, as request signalling is bound to the src buffer, with this change you signal the request too early, which may lead userland to think it can DQ the capture buffer. I see exactly that happening when running with GStreamer, which strictly rely on the request polling. Please keep using v4l2_m2m_buf_done_and_job_finish(), and move it into mtk_vdec_stateless_cap_to_disp().
v4l2_ctrl_request_complete(src_buf_req, &ctx->ctrl_hdl);
- v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx);
}
static void vb2ops_vdec_stateless_buf_queue(struct vb2_buffer *vb) @@ -358,6 +401,8 @@ const struct mtk_vcodec_dec_pdata mtk_vdec_8183_pdata = { .uses_stateless_api = true, .worker = mtk_vdec_worker, .flush_decoder = mtk_vdec_flush_decoder,
- .cap_to_disp = mtk_vdec_stateless_cap_to_disp,
- .get_cap_buffer = vdec_get_cap_buffer, .is_subdev_supported = false, .hw_arch = MTK_VDEC_PURE_SINGLE_CORE,
}; @@ -376,6 +421,8 @@ const struct mtk_vcodec_dec_pdata mtk_lat_sig_core_pdata = { .uses_stateless_api = true, .worker = mtk_vdec_worker, .flush_decoder = mtk_vdec_flush_decoder,
- .cap_to_disp = mtk_vdec_stateless_cap_to_disp,
- .get_cap_buffer = vdec_get_cap_buffer, .is_subdev_supported = true, .hw_arch = MTK_VDEC_LAT_SINGLE_CORE,
}; diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h index 2d1d878692ca..e0b7d2fda632 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h @@ -353,7 +353,8 @@ enum mtk_vdec_hw_arch {
- @ctrls_setup: init vcodec dec ctrls
- @worker: worker to start a decode job
- @flush_decoder: function that flushes the decoder
- @get_cap_buffer: get capture buffer from capture queue
- @cap_to_disp: put capture buffer to disp list
- @vdec_vb2_ops: struct vb2_ops
- @vdec_formats: supported video decoder formats
@@ -375,6 +376,8 @@ struct mtk_vcodec_dec_pdata { int (*ctrls_setup)(struct mtk_vcodec_ctx *ctx); void (*worker)(struct work_struct *work); int (*flush_decoder)(struct mtk_vcodec_ctx *ctx);
- struct vdec_fb *(*get_cap_buffer)(struct mtk_vcodec_ctx *ctx);
- void (*cap_to_disp)(struct mtk_vcodec_ctx *ctx, struct vdec_fb
*fb, int error);
struct vb2_ops *vdec_vb2_ops;
diff --git a/drivers/media/platform/mtk- vcodec/vdec/vdec_h264_req_if.c b/drivers/media/platform/mtk- vcodec/vdec/vdec_h264_req_if.c index 43542de11e9c..36f3dc1fbe3b 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_req_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_req_if.c @@ -670,32 +670,42 @@ static void vdec_h264_slice_deinit(void *h_vdec) }
static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
struct vdec_fb *fb, bool *res_chg)
struct vdec_fb *unused, bool
*res_chg) { struct vdec_h264_slice_inst *inst = h_vdec; const struct v4l2_ctrl_h264_decode_params *dec_params = get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); struct vdec_vpu_inst *vpu = &inst->vpu;
struct mtk_video_dec_buf *src_buf_info;
struct mtk_video_dec_buf *dst_buf_info;
struct vdec_fb *fb; u32 data[2]; u64 y_fb_dma; u64 c_fb_dma; int err;
inst->num_nalu++; /* bs NULL means flush decoder */ if (!bs) return vpu_dec_reset(vpu);
fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx);
src_buf_info = container_of(bs, struct mtk_video_dec_buf,
bs_buffer);
- dst_buf_info = container_of(fb, struct mtk_video_dec_buf,
frame_buffer);
y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
mtk_vcodec_debug(inst, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p",
++inst->num_nalu, y_fb_dma, c_fb_dma, fb);
inst->num_nalu, y_fb_dma, c_fb_dma, fb);
inst->vsi_ctx.dec.bs_dma = (uint64_t)bs->dma_addr; inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb;
v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb,
&dst_buf_info->m2m_buf.vb, true);
get_vdec_decode_parameters(inst); data[0] = bs->size; /*
@@ -734,6 +744,8 @@ static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); mtk_vcodec_debug(inst, "\n - NALU[%d]", inst->num_nalu);
- inst->ctx->dev->vdec_pdata->cap_to_disp(inst->ctx, fb, 0); return 0;
err_free_fb_out: