in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs, and etnaviv_gpu_init the call to pm_runtime_get_sync increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning.
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a31eeff2b297..16f5bc65771a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) { dev_err(gpu->dev, "Failed to enable GPU power domain\n"); - return ret; + goto pm_put; }
etnaviv_hw_identify(gpu); @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
fail: pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) - return ret; + goto pm_put;
dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW); dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH); @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) ret = 0;
pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) dev_err(gpu->dev, "recover hung GPU!\n");
if (pm_runtime_get_sync(gpu->dev) < 0) - return; + goto pm_put;
mutex_lock(&gpu->lock);
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev); }
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
if (!submit->runtime_resumed) { ret = pm_runtime_get_sync(gpu->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(gpu->dev); return NULL; + } submit->runtime_resumed = true; }
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n"); + pm_runtime_put(gpu->dev); return NULL; }
On Sun, Jun 14, 2020 at 9:48 AM Navid Emamdoost navid.emamdoost@gmail.com wrote:
...
if (ret < 0) {
pm_runtime_put(gpu->dev);
Please, in all your patches fix this to be _put_noidle(). We wouldn't bear the flag day of fixing these parts again. Yes, I know that *now* behaviour is the same, but calling put here is slightly inconsistent.
...
pm_runtime_put(gpu->dev);
in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs, and etnaviv_gpu_init the call to pm_runtime_get_sync increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning.
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com --- Changes in v2: - replace pm_runtime_put with pm_runtime_put_noidle --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a31eeff2b297..7c9f3f9ba123 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) { dev_err(gpu->dev, "Failed to enable GPU power domain\n"); - return ret; + goto pm_put; }
etnaviv_hw_identify(gpu); @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
fail: pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) - return ret; + goto pm_put;
dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW); dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH); @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) ret = 0;
pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) dev_err(gpu->dev, "recover hung GPU!\n");
if (pm_runtime_get_sync(gpu->dev) < 0) - return; + goto pm_put;
mutex_lock(&gpu->lock);
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev); }
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
if (!submit->runtime_resumed) { ret = pm_runtime_get_sync(gpu->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(gpu->dev); return NULL; + } submit->runtime_resumed = true; }
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n"); + pm_runtime_put_noidle(gpu->dev); return NULL; }
Hi Navid,
Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs, and etnaviv_gpu_init the call to pm_runtime_get_sync increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning.
While that change is correct with the current API, may I ask the question why the way this API works is considered reasonable? A API call that fails, but still changes internal state and expects the caller to clean up the mess it not really what I would consider fool- proof API design. Is there a specific reason why it is done this way and not handled internally?
Regards, Lucas
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com
Changes in v2:
- replace pm_runtime_put with pm_runtime_put_noidle
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a31eeff2b297..7c9f3f9ba123 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) { dev_err(gpu->dev, "Failed to enable GPU power domain\n");
return ret;
goto pm_put;
}
etnaviv_hw_identify(gpu);
@@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
fail: pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
ret = pm_runtime_get_sync(gpu->dev); if (ret < 0)
return ret;
goto pm_put;
dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW); dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
@@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) ret = 0;
pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret; @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) dev_err(gpu->dev, "recover hung GPU!\n");
if (pm_runtime_get_sync(gpu->dev) < 0)
return;
goto pm_put;
mutex_lock(&gpu->lock);
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev); }
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
if (!submit->runtime_resumed) { ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_noidle(gpu->dev); return NULL;
submit->runtime_resumed = true; }}
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n");
return NULL; }pm_runtime_put_noidle(gpu->dev);
Hi Lucas,
On Wed, Jun 17, 2020 at 4:53 AM Lucas Stach l.stach@pengutronix.de wrote:
Hi Navid,
Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs, and etnaviv_gpu_init the call to pm_runtime_get_sync increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning.
While that change is correct with the current API, may I ask the question why the way this API works is considered reasonable? A API call that fails, but still changes internal state and expects the caller to clean up the mess it not really what I would consider fool- proof API design. Is there a specific reason why it is done this way and not handled internally?
I share the same concern with you on the way this API is working now. To the best of my knowledge, there are ongoing discussions on this issue:
https://lkml.org/lkml/2020/6/14/76 https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-...
Regards, Lucas
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com
Changes in v2: - replace pm_runtime_put with pm_runtime_put_noidle
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a31eeff2b297..7c9f3f9ba123 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) { dev_err(gpu->dev, "Failed to enable GPU power domain\n");
return ret;
goto pm_put; } etnaviv_hw_identify(gpu);
@@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
fail: pm_runtime_mark_last_busy(gpu->dev); +pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret;
@@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
ret = pm_runtime_get_sync(gpu->dev); if (ret < 0)
return ret;
goto pm_put; dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW); dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
@@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) ret = 0;
pm_runtime_mark_last_busy(gpu->dev);
+pm_put: pm_runtime_put_autosuspend(gpu->dev);
return ret;
@@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) dev_err(gpu->dev, "recover hung GPU!\n");
if (pm_runtime_get_sync(gpu->dev) < 0)
return;
goto pm_put; mutex_lock(&gpu->lock);
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev);
+pm_put: pm_runtime_put_autosuspend(gpu->dev); }
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
if (!submit->runtime_resumed) { ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_noidle(gpu->dev); return NULL;
} submit->runtime_resumed = true; }
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n");
pm_runtime_put_noidle(gpu->dev); return NULL; }
-- Navid.
On Sun, Jun 14, 2020 at 4:48 AM Andy Shevchenko andy.shevchenko@gmail.com wrote:
On Sun, Jun 14, 2020 at 9:48 AM Navid Emamdoost navid.emamdoost@gmail.com wrote:
...
if (ret < 0) {
pm_runtime_put(gpu->dev);
Please, in all your patches fix this to be _put_noidle(). We wouldn't bear the flag day of fixing these parts again. Yes, I know that *now* behaviour is the same, but calling put here is slightly inconsistent.
v2 is sent.
...
pm_runtime_put(gpu->dev);
-- With Best Regards, Andy Shevchenko
dri-devel@lists.freedesktop.org