Subject: [PATCH 0/3] Fix drm suspend and resume issue
1. When the hardwire components on crtc is using GCE to configure their register, cmdq_suspend may be called. So add cmdq_mbox_flush to clear all tasks and release GCE clocks before cmdq_suspend.
2. The suspend and resume order of components on crtc can be guaranteed by the same power-domain, but cmdq can not. So add devlink to cmdq dev make sure the order of suspend and resume: cmdq_suspend is latter than drm_suspend and cmdq_resume is earlier than drm_resume.
jason-jh.lin (3): mialbox: move cmdq suspend,resume and remove after cmdq_mbox_flush mailbox: add cmdq_mbox_flush to clear all task before suspend drm/mediatek: add devlink to cmdq dev
drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 13 ++++ drivers/mailbox/mtk-cmdq-mailbox.c | 81 ++++++++++++------------- 2 files changed, 52 insertions(+), 42 deletions(-)
Move the function order to make sure cmdq_mbox_flush is declared before cmdq_suspend calling it.
Signed-off-by: jason-jh.lin jason-jh.lin@mediatek.com --- drivers/mailbox/mtk-cmdq-mailbox.c | 84 +++++++++++++++--------------- 1 file changed, 42 insertions(+), 42 deletions(-)
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index c591dab9d5a4..03f9ed4c5131 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -296,48 +296,6 @@ static irqreturn_t cmdq_irq_handler(int irq, void *dev) return IRQ_HANDLED; }
-static int cmdq_suspend(struct device *dev) -{ - struct cmdq *cmdq = dev_get_drvdata(dev); - struct cmdq_thread *thread; - int i; - bool task_running = false; - - cmdq->suspended = true; - - for (i = 0; i < cmdq->thread_nr; i++) { - thread = &cmdq->thread[i]; - if (!list_empty(&thread->task_busy_list)) { - task_running = true; - break; - } - } - - if (task_running) - dev_warn(dev, "exist running task(s) in suspend\n"); - - clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); - - return 0; -} - -static int cmdq_resume(struct device *dev) -{ - struct cmdq *cmdq = dev_get_drvdata(dev); - - WARN_ON(clk_bulk_prepare(cmdq->gce_num, cmdq->clocks)); - cmdq->suspended = false; - return 0; -} - -static int cmdq_remove(struct platform_device *pdev) -{ - struct cmdq *cmdq = platform_get_drvdata(pdev); - - clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); - return 0; -} - static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) { struct cmdq_pkt *pkt = (struct cmdq_pkt *)data; @@ -521,6 +479,48 @@ static struct mbox_chan *cmdq_xlate(struct mbox_controller *mbox, return &mbox->chans[ind]; }
+static int cmdq_suspend(struct device *dev) +{ + struct cmdq *cmdq = dev_get_drvdata(dev); + struct cmdq_thread *thread; + int i; + bool task_running = false; + + cmdq->suspended = true; + + for (i = 0; i < cmdq->thread_nr; i++) { + thread = &cmdq->thread[i]; + if (!list_empty(&thread->task_busy_list)) { + task_running = true; + break; + } + } + + if (task_running) + dev_warn(dev, "exist running task(s) in suspend\n"); + + clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); + + return 0; +} + +static int cmdq_resume(struct device *dev) +{ + struct cmdq *cmdq = dev_get_drvdata(dev); + + WARN_ON(clk_bulk_prepare(cmdq->gce_num, cmdq->clocks)); + cmdq->suspended = false; + return 0; +} + +static int cmdq_remove(struct platform_device *pdev) +{ + struct cmdq *cmdq = platform_get_drvdata(pdev); + + clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); + return 0; +} + static int cmdq_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev;
On Wed, Nov 17, 2021 at 02:41:56PM +0800, jason-jh.lin wrote:
Typo in the commit title "mialbox: move cmdq suspend,resume and remove after cmdq_mbox_flush".
s/mialbox/mailbox/
Hi Tzung-Bi,
Thanks for the reviews.
On Thu, 2021-11-18 at 13:55 +0800, Tzung-Bi Shih wrote:
On Wed, Nov 17, 2021 at 02:41:56PM +0800, jason-jh.lin wrote:
Typo in the commit title "mialbox: move cmdq suspend,resume and remove after cmdq_mbox_flush".
s/mialbox/mailbox/
CMDQ driver will occupy GCE clock to execute the task in GCE thread.
So call cmdq_mbox_flush to clear all task in GCE thread before CMDQ suspend.
Signed-off-by: jason-jh.lin jason-jh.lin@mediatek.com --- drivers/mailbox/mtk-cmdq-mailbox.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 03f9ed4c5131..28cadfc0091b 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -484,21 +484,18 @@ static int cmdq_suspend(struct device *dev) struct cmdq *cmdq = dev_get_drvdata(dev); struct cmdq_thread *thread; int i; - bool task_running = false;
cmdq->suspended = true;
for (i = 0; i < cmdq->thread_nr; i++) { thread = &cmdq->thread[i]; if (!list_empty(&thread->task_busy_list)) { - task_running = true; - break; + /* try to clear all task in this thread */ + cmdq_mbox_flush(thread->chan, 2000); + dev_warn(dev, "thread[%d] exist running task(s) in suspend\n", i); } }
- if (task_running) - dev_warn(dev, "exist running task(s) in suspend\n"); - clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks);
return 0;
Hi, Jason:
jason-jh.lin jason-jh.lin@mediatek.com 於 2021年11月17日 週三 下午2:42寫道:
CMDQ driver will occupy GCE clock to execute the task in GCE thread.
So call cmdq_mbox_flush to clear all task in GCE thread before CMDQ suspend.
Signed-off-by: jason-jh.lin jason-jh.lin@mediatek.com
drivers/mailbox/mtk-cmdq-mailbox.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 03f9ed4c5131..28cadfc0091b 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -484,21 +484,18 @@ static int cmdq_suspend(struct device *dev) struct cmdq *cmdq = dev_get_drvdata(dev); struct cmdq_thread *thread; int i;
bool task_running = false; cmdq->suspended = true; for (i = 0; i < cmdq->thread_nr; i++) { thread = &cmdq->thread[i]; if (!list_empty(&thread->task_busy_list)) {
task_running = true;
break;
/* try to clear all task in this thread */
cmdq_mbox_flush(thread->chan, 2000);
I would like the normal control flow rather than error handling. So the normal control flow is:
1. Client driver suspend: Flush command. 2. CMDQ driver suspend: There is no command to flush. If there are command, show error message and debug the client driver.
The error handling flow:
1. Client driver suspend: Does not flush command. 2. CMDQ driver suspend: Flush command and callback to client driver. Client driver process these callback as error handling.
The client driver may integrate multiple driver. In the suspend flow, it may need to stop these driver in a sequence such as.
1. Stop driver 1 2. Stop driver 2 3. Stop driver 3 (cmdq) 4. Stop driver 4 5. Stop driver 5.
In the normal flow, client driver could control the stop flow. In the error handling flow, it does not match the stop flow.
Regards, Chun-Kuang.
dev_warn(dev, "thread[%d] exist running task(s) in suspend\n", i); } }
if (task_running)
dev_warn(dev, "exist running task(s) in suspend\n");
clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); return 0;
-- 2.18.0
Hi Chun-Kuang,
Thanks for the reviews.
On Fri, 2021-11-19 at 08:01 +0800, Chun-Kuang Hu wrote:
Hi, Jason:
jason-jh.lin jason-jh.lin@mediatek.com 於 2021年11月17日 週三 下午2:42寫道:
CMDQ driver will occupy GCE clock to execute the task in GCE thread.
So call cmdq_mbox_flush to clear all task in GCE thread before CMDQ suspend.
Signed-off-by: jason-jh.lin jason-jh.lin@mediatek.com
drivers/mailbox/mtk-cmdq-mailbox.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 03f9ed4c5131..28cadfc0091b 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -484,21 +484,18 @@ static int cmdq_suspend(struct device *dev) struct cmdq *cmdq = dev_get_drvdata(dev); struct cmdq_thread *thread; int i;
bool task_running = false; cmdq->suspended = true; for (i = 0; i < cmdq->thread_nr; i++) { thread = &cmdq->thread[i]; if (!list_empty(&thread->task_busy_list)) {
task_running = true;
break;
/* try to clear all task in this thread */
cmdq_mbox_flush(thread->chan, 2000);
I would like the normal control flow rather than error handling. So the normal control flow is:
- Client driver suspend: Flush command.
- CMDQ driver suspend: There is no command to flush. If there are
command, show error message and debug the client driver.
The error handling flow:
- Client driver suspend: Does not flush command.
- CMDQ driver suspend: Flush command and callback to client driver.
Client driver process these callback as error handling.
The client driver may integrate multiple driver. In the suspend flow, it may need to stop these driver in a sequence such as.
- Stop driver 1
- Stop driver 2
- Stop driver 3 (cmdq)
- Stop driver 4
- Stop driver 5.
In the normal flow, client driver could control the stop flow. In the error handling flow, it does not match the stop flow.
Regards, Chun-Kuang.
I have tried the normal flow: 1. Client driver suspend: Flush command. 2. CMDQ driver suspend: There is no command to flush. If there are command, show error message and debug the client driver.
Then I found the cmdq task of crtc_1 is still executing when cmdq_suspend sometimes. I think it is the latest async cmd sent in mtk_drm_crtc_disable. So I'll try to change it to blocking cmd and see the issue is still happened or not.
If it works, then I'll revert this patch and send add blocking cmd in mtk_drm_crtc_disable at the next version.
Regards, Jason-JH.Lin
dev_warn(dev, "thread[%d] exist running
task(s) in suspend\n", i); } }
if (task_running)
dev_warn(dev, "exist running task(s) in
suspend\n");
clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks); return 0;
-- 2.18.0
Add devlink to cmdq to make sure the order of suspend and resume is correct.
Signed-off-by: jason-jh.lin jason-jh.lin@mediatek.com --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 24d9bde4d6e2..0a472719709d 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -59,6 +59,7 @@ struct mtk_drm_crtc { #endif
struct device *mmsys_dev; + struct device *drm_dev; struct mtk_mutex *mutex; unsigned int ddp_comp_nr; struct mtk_ddp_comp **ddp_comp; @@ -158,6 +159,7 @@ static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle);
if (mtk_crtc->cmdq_client.chan) { + device_link_remove(mtk_crtc->drm_dev, mtk_crtc->cmdq_client.chan->mbox->dev); mbox_free_channel(mtk_crtc->cmdq_client.chan); mtk_crtc->cmdq_client.chan = NULL; } @@ -888,6 +890,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, return -ENOMEM;
mtk_crtc->mmsys_dev = priv->mmsys_dev; + mtk_crtc->drm_dev = dev; mtk_crtc->ddp_comp_nr = path_len; mtk_crtc->ddp_comp = devm_kmalloc_array(dev, mtk_crtc->ddp_comp_nr, sizeof(*mtk_crtc->ddp_comp), @@ -956,6 +959,16 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, }
if (mtk_crtc->cmdq_client.chan) { + struct device_link *link; + + /* add devlink to cmdq dev to make sure suspend/resume order is correct */ + link = device_link_add(dev, mtk_crtc->cmdq_client.chan->mbox->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + if (!link) { + dev_err(dev, "Unable to link dev=%s\n", + dev_name(mtk_crtc->cmdq_client.chan->mbox->dev)); + } + ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base),
On Wed, Nov 17, 2021 at 02:41:58PM +0800, jason-jh.lin wrote:
@@ -158,6 +159,7 @@ static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle);
if (mtk_crtc->cmdq_client.chan) {
mbox_free_channel(mtk_crtc->cmdq_client.chan); mtk_crtc->cmdq_client.chan = NULL; }device_link_remove(mtk_crtc->drm_dev, mtk_crtc->cmdq_client.chan->mbox->dev);
[...]
@@ -956,6 +959,16 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, }
if (mtk_crtc->cmdq_client.chan) {
struct device_link *link;
/* add devlink to cmdq dev to make sure suspend/resume order is correct */
link = device_link_add(dev, mtk_crtc->cmdq_client.chan->mbox->dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
if (!link) {
dev_err(dev, "Unable to link dev=%s\n",
dev_name(mtk_crtc->cmdq_client.chan->mbox->dev));
}
If device_link_add() failed, doesn't mtk_drm_crtc_create() need to return an error and exit?
OTOH, if device_link_add() failed, won't it bring any side effects to call device_link_remove()?
Hi Tzung-Bi,
Thanks, I'll fix it.
On Thu, 2021-11-18 at 13:54 +0800, Tzung-Bi Shih wrote:
On Wed, Nov 17, 2021 at 02:41:55PM +0800, jason-jh.lin wrote:
Subject: [PATCH 0/3] Fix drm suspend and resume issue
You have 2 Subjects. The first one takes precedence.
dri-devel@lists.freedesktop.org