Here's a series to re-add GPU reset, which I cut out of the driver in 8483d152db61c5baf5452b844ef65b96ee9a6cfb to deal with a build regression. Along the way, I found a bug in hang detection that seemed to be exacerbated by runtime PM.
My preference would be to get this in -fixes, since without GPU reset we can end up with a totally wedged desktop (and we do still have some known GPU hangs). If not, I'm fine with the first patch to -fixes and the other two to -next.
Eric Anholt (3): drm/vc4: Fix spurious GPU resets due to BO reuse. drm/vc4: Enable runtime PM. drm/vc4: Use runtime PM to power cycle the device when the GPU hangs.
drivers/gpu/drm/vc4/vc4_drv.h | 13 +++++++++-- drivers/gpu/drm/vc4/vc4_gem.c | 51 ++++++++++++++++++++++++++++++++++++------- drivers/gpu/drm/vc4/vc4_v3d.c | 48 ++++++++++++++++++++++++++++++---------- 3 files changed, 90 insertions(+), 22 deletions(-)
We were tracking the "where are the head pointers pointing" globally, so if another job reused the same BOs and execution was at the same point as last time we checked, we'd stop and trigger a reset even though the GPU had made progress.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.h | 6 +++++- drivers/gpu/drm/vc4/vc4_gem.c | 19 ++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index a7fea77..b4a26fd 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -92,7 +92,6 @@ struct vc4_dev { struct work_struct overflow_mem_work;
struct { - uint32_t last_ct0ca, last_ct1ca; struct timer_list timer; struct work_struct reset_work; } hangcheck; @@ -202,6 +201,11 @@ struct vc4_exec_info { /* Sequence number for this bin/render job. */ uint64_t seqno;
+ /* Last current addresses the hardware was processing when the + * hangcheck timer checked on us. + */ + uint32_t last_ct0ca, last_ct1ca; + /* Kernel-space copy of the ioctl arguments */ struct drm_vc4_submit_cl *args;
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 48ce30a..1b7adf1 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -257,10 +257,17 @@ vc4_hangcheck_elapsed(unsigned long data) struct drm_device *dev = (struct drm_device *)data; struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t ct0ca, ct1ca; + unsigned long irqflags; + struct vc4_exec_info *exec; + + spin_lock_irqsave(&vc4->job_lock, irqflags); + exec = vc4_first_job(vc4);
/* If idle, we can stop watching for hangs. */ - if (list_empty(&vc4->job_list)) + if (!exec) { + spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; + }
ct0ca = V3D_READ(V3D_CTNCA(0)); ct1ca = V3D_READ(V3D_CTNCA(1)); @@ -268,14 +275,16 @@ vc4_hangcheck_elapsed(unsigned long data) /* If we've made any progress in execution, rearm the timer * and wait. */ - if (ct0ca != vc4->hangcheck.last_ct0ca || - ct1ca != vc4->hangcheck.last_ct1ca) { - vc4->hangcheck.last_ct0ca = ct0ca; - vc4->hangcheck.last_ct1ca = ct1ca; + if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) { + exec->last_ct0ca = ct0ca; + exec->last_ct1ca = ct1ca; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); vc4_queue_hangcheck(dev); return; }
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags); + /* We've gone too long with no progress, reset. This has to * be done from a work struct, since resetting can sleep and * this timer hook isn't allowed to.
This may actually get us a feature that the closed driver didn't have: turning off the GPU in between rendering jobs, while the V3D device is still opened by the client.
There may be some tuning to be applied here to use autosuspend so that we don't bounce the device's power so much, but in steady-state GPU-bound rendering we keep the power on (since we keep multiple jobs outstanding) and even if we power cycle on every job we can still manage at least 680 fps.
More importantly, though, runtime PM will allow us to power off the device to do a GPU reset.
Signed-off-by: Eric Anholt eric@anholt.net ---
I'm not 100% sure about the closed driver comparison -- the equivalent of the kernel side for the closed driver keeps the power on as far as I can see, but their userspace equivalent may open/close the kernel more frequenctly than I expect.
drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 10 ++++++++++ drivers/gpu/drm/vc4/vc4_v3d.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index b4a26fd..b65e785 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -141,6 +141,7 @@ struct vc4_seqno_cb { };
struct vc4_v3d { + struct vc4_dev *vc4; struct platform_device *pdev; void __iomem *regs; }; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 1b7adf1..440ad03 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -23,6 +23,7 @@
#include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/device.h> #include <linux/io.h>
@@ -626,6 +627,7 @@ fail: static void vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned i;
/* Need the struct lock for drm_gem_object_unreference(). */ @@ -644,6 +646,8 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) } mutex_unlock(&dev->struct_mutex);
+ pm_runtime_put(&vc4->v3d->pdev->dev); + kfree(exec); }
@@ -794,6 +798,12 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -ENOMEM; }
+ ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret < 0) { + kfree(exec); + return ret; + } + exec->args = args; INIT_LIST_HEAD(&exec->unref_list);
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 314ff71..930851f 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -17,6 +17,7 @@ */
#include "linux/component.h" +#include "linux/pm_runtime.h" #include "vc4_drv.h" #include "vc4_regs.h"
@@ -167,6 +168,29 @@ static void vc4_v3d_init_hw(struct drm_device *dev) V3D_WRITE(V3D_VPMBASE, 0); }
+#ifdef CONFIG_PM_SLEEP +static int vc4_v3d_runtime_suspend(struct device *dev) +{ + struct vc4_v3d *v3d = dev_get_drvdata(dev); + struct vc4_dev *vc4 = v3d->vc4; + + vc4_irq_uninstall(vc4->dev); + + return 0; +} + +static int vc4_v3d_runtime_resume(struct device *dev) +{ + struct vc4_v3d *v3d = dev_get_drvdata(dev); + struct vc4_dev *vc4 = v3d->vc4; + + vc4_v3d_init_hw(vc4->dev); + vc4_irq_postinstall(vc4->dev); + + return 0; +} +#endif + static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); @@ -179,6 +203,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) if (!v3d) return -ENOMEM;
+ dev_set_drvdata(dev, v3d); + v3d->pdev = pdev;
v3d->regs = vc4_ioremap_regs(pdev, 0); @@ -186,6 +212,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(v3d->regs);
vc4->v3d = v3d; + v3d->vc4 = vc4;
if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", @@ -207,6 +234,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return ret; }
+ pm_runtime_enable(dev); + return 0; }
@@ -216,6 +245,8 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master, struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm);
+ pm_runtime_disable(dev); + drm_irq_uninstall(drm);
/* Disable the binner's overflow memory address, so the next @@ -228,6 +259,10 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master, vc4->v3d = NULL; }
+static const struct dev_pm_ops vc4_v3d_pm_ops = { + SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL) +}; + static const struct component_ops vc4_v3d_ops = { .bind = vc4_v3d_bind, .unbind = vc4_v3d_unbind, @@ -255,5 +290,6 @@ struct platform_driver vc4_v3d_driver = { .driver = { .name = "vc4_v3d", .of_match_table = vc4_v3d_dt_match, + .pm = &vc4_v3d_pm_ops, }, };
Eric Anholt eric@anholt.net writes:
+#ifdef CONFIG_PM_SLEEP +static int vc4_v3d_runtime_suspend(struct device *dev) +{
- struct vc4_v3d *v3d = dev_get_drvdata(dev);
- struct vc4_dev *vc4 = v3d->vc4;
- vc4_irq_uninstall(vc4->dev);
- return 0;
+}
+static int vc4_v3d_runtime_resume(struct device *dev) +{
- struct vc4_v3d *v3d = dev_get_drvdata(dev);
- struct vc4_dev *vc4 = v3d->vc4;
- vc4_v3d_init_hw(vc4->dev);
- vc4_irq_postinstall(vc4->dev);
- return 0;
+} +#endif
kbuild test robot caught that this #ifdef should have been CONFIG_PM, not CONFIG_PM_SLEEP. Fixed in v2.
This gets us functional GPU reset again, like we had until a refactor at merge time. Tested with a little patch to stuff in a broken binner job every 100 frames.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.h | 6 +++++- drivers/gpu/drm/vc4/vc4_gem.c | 26 +++++++++++++++++++++----- drivers/gpu/drm/vc4/vc4_v3d.c | 12 ------------ 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index b65e785..83db0b7 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -91,6 +91,11 @@ struct vc4_dev { struct vc4_bo *overflow_mem; struct work_struct overflow_mem_work;
+ int power_refcount; + + /* Mutex controlling the power refcount. */ + struct mutex power_lock; + struct { struct timer_list timer; struct work_struct reset_work; @@ -449,7 +454,6 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, extern struct platform_driver vc4_v3d_driver; int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); -int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
/* vc4_validate.c */ int diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 440ad03..5df46d6 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -229,8 +229,16 @@ vc4_reset(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev);
DRM_INFO("Resetting GPU.\n"); - vc4_v3d_set_power(vc4, false); - vc4_v3d_set_power(vc4, true); + + mutex_lock(&vc4->power_lock); + if (vc4->power_refcount) { + /* Power the device off and back on the by dropping the + * reference on runtime PM. + */ + pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); + pm_runtime_get_sync(&vc4->v3d->pdev->dev); + } + mutex_unlock(&vc4->power_lock);
vc4_irq_reset(dev);
@@ -646,7 +654,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) } mutex_unlock(&dev->struct_mutex);
- pm_runtime_put(&vc4->v3d->pdev->dev); + mutex_lock(&vc4->power_lock); + if (--vc4->power_refcount == 0) + pm_runtime_put(&vc4->v3d->pdev->dev); + mutex_unlock(&vc4->power_lock);
kfree(exec); } @@ -785,7 +796,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; - int ret; + int ret = 0;
if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); @@ -798,7 +809,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -ENOMEM; }
- ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + mutex_lock(&vc4->power_lock); + if (vc4->power_refcount++ == 0) + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + mutex_unlock(&vc4->power_lock); if (ret < 0) { kfree(exec); return ret; @@ -858,6 +872,8 @@ vc4_gem_init(struct drm_device *dev) (unsigned long)dev);
INIT_WORK(&vc4->job_done_work, vc4_job_done_work); + + mutex_init(&vc4->power_lock); }
void diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 930851f..7b11e21 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -145,18 +145,6 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) } #endif /* CONFIG_DEBUG_FS */
-int -vc4_v3d_set_power(struct vc4_dev *vc4, bool on) -{ - /* XXX: This interface is needed for GPU reset, and the way to - * do it is to turn our power domain off and back on. We - * can't just reset from within the driver, because the reset - * bits are in the power domain's register area, and get set - * during the poweron process. - */ - return 0; -} - static void vc4_v3d_init_hw(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev);
dri-devel@lists.freedesktop.org