system crash due to NOC fatal error if any module initialization faield during msm_drm_init()
Kuogee Hsieh (2): drm/msm: enable msm irq after all initializations are done successfully at msm_drm_init() drm/msm/dp: check core_initialized before disable interrupts at dp_display_unbind()
drivers/gpu/drm/msm/dp/dp_display.c | 3 ++- drivers/gpu/drm/msm/msm_drv.c | 29 ++++++----------------------- 2 files changed, 8 insertions(+), 24 deletions(-)
At msm initialize phase, msm_drm_init() is called to initialize modules sequentially. It will call msm_drm_uninit() to clean up initialized phase if any module initialization return failed. This patch move msm_irq_install() to the last step of msm_drm_init() after all modules are initialized successfully so that no msm_irq_unistall() required at msm_drm_uninit() if any module initialization failed happen at msm_drm_init().
Signed-off-by: Kuogee Hsieh quic_khsieh@quicinc.com --- drivers/gpu/drm/msm/msm_drv.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 6665c5a..ab42e9a 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -132,15 +132,6 @@ static int msm_irq_install(struct drm_device *dev, unsigned int irq) return 0; }
-static void msm_irq_uninstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - - kms->funcs->irq_uninstall(kms); - free_irq(kms->irq, dev); -} - struct msm_vblank_work { struct work_struct work; int crtc_id; @@ -232,10 +223,6 @@ static int msm_drm_uninit(struct device *dev)
drm_mode_config_cleanup(ddev);
- pm_runtime_get_sync(dev); - msm_irq_uninstall(ddev); - pm_runtime_put_sync(dev); - if (kms && kms->funcs) kms->funcs->destroy(kms);
@@ -437,16 +424,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) goto err_msm_uninit; }
- if (kms) { - pm_runtime_get_sync(dev); - ret = msm_irq_install(ddev, kms->irq); - pm_runtime_put_sync(dev); - if (ret < 0) { - DRM_DEV_ERROR(dev, "failed to install IRQ handler\n"); - goto err_msm_uninit; - } - } - ret = drm_dev_register(ddev, 0); if (ret) goto err_msm_uninit; @@ -467,6 +444,12 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit;
+ if (kms) { + pm_runtime_get_sync(dev); + msm_irq_install(ddev, kms->irq); + pm_runtime_put_sync(dev); + } + drm_kms_helper_poll_init(ddev);
return 0;
I will drop this patch since the problem had been fixed by Dmitry Baryshkov patch.
https://gitlab.freedesktop.org/drm/msm/-/commit/577e2a9dfc8fba7938aaf75db63f...
On 6/3/2022 2:28 PM, Kuogee Hsieh wrote:
At msm initialize phase, msm_drm_init() is called to initialize modules sequentially. It will call msm_drm_uninit() to clean up initialized phase if any module initialization return failed. This patch move msm_irq_install() to the last step of msm_drm_init() after all modules are initialized successfully so that no msm_irq_unistall() required at msm_drm_uninit() if any module initialization failed happen at msm_drm_init().
Signed-off-by: Kuogee Hsieh quic_khsieh@quicinc.com
drivers/gpu/drm/msm/msm_drv.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 6665c5a..ab42e9a 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -132,15 +132,6 @@ static int msm_irq_install(struct drm_device *dev, unsigned int irq) return 0; }
-static void msm_irq_uninstall(struct drm_device *dev) -{
- struct msm_drm_private *priv = dev->dev_private;
- struct msm_kms *kms = priv->kms;
- kms->funcs->irq_uninstall(kms);
- free_irq(kms->irq, dev);
-}
- struct msm_vblank_work { struct work_struct work; int crtc_id;
@@ -232,10 +223,6 @@ static int msm_drm_uninit(struct device *dev)
drm_mode_config_cleanup(ddev);
- pm_runtime_get_sync(dev);
- msm_irq_uninstall(ddev);
- pm_runtime_put_sync(dev);
- if (kms && kms->funcs) kms->funcs->destroy(kms);
@@ -437,16 +424,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) goto err_msm_uninit; }
- if (kms) {
pm_runtime_get_sync(dev);
ret = msm_irq_install(ddev, kms->irq);
pm_runtime_put_sync(dev);
if (ret < 0) {
DRM_DEV_ERROR(dev, "failed to install IRQ handler\n");
goto err_msm_uninit;
}
- }
- ret = drm_dev_register(ddev, 0); if (ret) goto err_msm_uninit;
@@ -467,6 +444,12 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit;
if (kms) {
pm_runtime_get_sync(dev);
msm_irq_install(ddev, kms->irq);
pm_runtime_put_sync(dev);
}
drm_kms_helper_poll_init(ddev);
return 0;
During msm initialize phase, dp_display_unbind() will be called to undo initializations had been done by dp_display_bind() previously if there is error happen at msm_drm_bind. In this case, core_initialized flag had to be check to make sure clocks is on before update DP controller register to disable HPD interrupts. Otherwise system will crash due to below NOC fatal error.
QTISECLIB [01f01a7ad]CNOC2 ERROR: ERRLOG0_LOW = 0x00061007 QTISECLIB [01f01a7ad]GEM_NOC ERROR: ERRLOG0_LOW = 0x00001007 QTISECLIB [01f0371a0]CNOC2 ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f055297]GEM_NOC ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f072beb]CNOC2 ERROR: ERRLOG1_LOW = 0x00000024 QTISECLIB [01f0914b8]GEM_NOC ERROR: ERRLOG1_LOW = 0x00000042 QTISECLIB [01f0ae639]CNOC2 ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0cc73f]GEM_NOC ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0ea092]CNOC2 ERROR: ERRLOG2_LOW = 0x0009020c QTISECLIB [01f10895f]GEM_NOC ERROR: ERRLOG2_LOW = 0x0ae9020c QTISECLIB [01f125ae1]CNOC2 ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f143be7]GEM_NOC ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f16153a]CNOC2 ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f17fe07]GEM_NOC ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f19cf89]CNOC2 ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1bb08e]GEM_NOC ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1d8a31]CNOC2 ERROR: SBM1 FAULTINSTATUS0_LOW = 0x00000002 QTISECLIB [01f1f72a4]GEM_NOC ERROR: SBM0 FAULTINSTATUS0_LOW = 0x00000001 QTISECLIB [01f21a217]CNOC3 ERROR: ERRLOG0_LOW = 0x00000006 QTISECLIB [01f23dfd3]NOC error fatal
Signed-off-by: Kuogee Hsieh quic_khsieh@quicinc.com --- drivers/gpu/drm/msm/dp/dp_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 69afb25..af233c9 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -309,7 +309,8 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct msm_drm_private *priv = dev_get_drvdata(master);
/* disable all HPD interrupts */ - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + if (dp->core_initialized) + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
kthread_stop(dp->ev_tsk);
dri-devel@lists.freedesktop.org