From: Rob Clark robdclark@chromium.org
This adds additional snapshotting for interesting GMU buffers to the devcore dumps, adds a couple WARN_ON()s, etc. (Plus a bonus comment)
Akhil P Oommen (1): drm/msm/a6xx: Capture gmu log in devcoredump
Rob Clark (6): drm/msm/gpu: Name GMU bos drm/msm/gpu: Add some WARN_ON()s drm/msm/gpu: Make a6xx_get_gmu_log() more generic drm/msm/gpu: Also snapshot GMU HFI buffer drm/msm/gpu: Snapshot GMU debug buffer drm/msm/gpu: Add a comment in a6xx_gmu_init()
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 ++++-- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 95 +++++++++++++++++++++ drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 10 +++ drivers/gpu/drm/msm/adreno/a6xx_hfi.h | 11 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 5 +- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + 6 files changed, 138 insertions(+), 10 deletions(-)
From: Akhil P Oommen akhilpo@codeaurora.org
Capture gmu log in coredump to enhance debugging.
Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 41 +++++++++++++++++++++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + 3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 6e90209cd543..f1b1a9bffb37 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -42,6 +42,8 @@ struct a6xx_gpu_state { struct a6xx_gpu_state_obj *cx_debugbus; int nr_cx_debugbus;
+ struct msm_gpu_state_bo *gmu_log; + struct list_head objs; };
@@ -800,6 +802,30 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, &a6xx_state->gmu_registers[2], false); }
+static void a6xx_get_gmu_log(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + struct msm_gpu_state_bo *gmu_log; + + gmu_log = state_kcalloc(a6xx_state, + 1, sizeof(*a6xx_state->gmu_log)); + if (!gmu_log) + return; + + gmu_log->iova = gmu->log.iova; + gmu_log->size = gmu->log.size; + gmu_log->data = kvzalloc(gmu_log->size, GFP_KERNEL); + if (!gmu_log->data) + return; + + memcpy(gmu_log->data, gmu->log.virt, gmu->log.size); + + a6xx_state->gmu_log = gmu_log; +} + #define A6XX_GBIF_REGLIST_SIZE 1 static void a6xx_get_registers(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state, @@ -937,6 +963,8 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
a6xx_get_gmu_registers(gpu, a6xx_state);
+ a6xx_get_gmu_log(gpu, a6xx_state); + /* If GX isn't on the rest of the data isn't going to be accessible */ if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) return &a6xx_state->base; @@ -978,6 +1006,9 @@ static void a6xx_gpu_state_destroy(struct kref *kref) struct a6xx_gpu_state *a6xx_state = container_of(state, struct a6xx_gpu_state, base);
+ if (a6xx_state->gmu_log && a6xx_state->gmu_log->data) + kvfree(a6xx_state->gmu_log->data); + list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) kfree(obj);
@@ -1191,6 +1222,16 @@ void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
adreno_show(gpu, state, p);
+ drm_puts(p, "gmu-log:\n"); + if (a6xx_state->gmu_log) { + struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; + + drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); + drm_printf(p, " size: %d\n", gmu_log->size); + adreno_show_object(p, &gmu_log->data, gmu_log->size, + &gmu_log->encoded); + } + drm_puts(p, "registers:\n"); for (i = 0; i < a6xx_state->nr_registers; i++) { struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 7bac86b01f30..a379f98aca54 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -630,7 +630,7 @@ static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) }
/* len is expected to be in bytes */ -static void adreno_show_object(struct drm_printer *p, void **ptr, int len, +void adreno_show_object(struct drm_printer *p, void **ptr, int len, bool *encoded) { if (!*ptr || !len) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 225c277a6223..676230862671 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -306,6 +306,8 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state);
int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state); int adreno_gpu_state_put(struct msm_gpu_state *state); +void adreno_show_object(struct drm_printer *p, void **ptr, int len, + bool *encoded);
/* * Common helper function to initialize the default address space for arm-smmu
From: Rob Clark robdclark@chromium.org
This was supposed to be a relative timer, not absolute.
Fixes: 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Douglas Anderson dianders@chromium.org --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 43468919df61..7285041c737e 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -228,5 +228,5 @@ void msm_devfreq_idle(struct msm_gpu *gpu) struct msm_gpu_devfreq *df = &gpu->devfreq;
msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), - HRTIMER_MODE_ABS); + HRTIMER_MODE_REL); }
From: Rob Clark robdclark@chromium.org
Looks like 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") was badly rebased on top of efb8a170a367 ("drm/msm: Fix devfreq NULL pointer dereference on a3xx") and ended up with the NULL check in the wrong place.
Fixes: 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 7285041c737e..1f55242bb6a1 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -203,9 +203,6 @@ static void msm_devfreq_idle_work(struct kthread_work *work) struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq); unsigned long idle_freq, target_freq = 0;
- if (!df->devfreq) - return; - /* * Hold devfreq lock to synchronize with get_dev_status()/ * target() callbacks @@ -227,6 +224,9 @@ void msm_devfreq_idle(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq;
+ if (!df->devfreq) + return; + msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), HRTIMER_MODE_REL); }
From: Rob Clark robdclark@chromium.org
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 71e52b2b2025..e1774ea342b1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1146,7 +1146,7 @@ static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu) }
static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, - size_t size, u64 iova) + size_t size, u64 iova, const char *name) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct drm_device *dev = a6xx_gpu->base.base.dev; @@ -1181,6 +1181,8 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, bo->virt = msm_gem_get_vaddr(bo->obj); bo->size = size;
+ msm_gem_object_set_name(bo->obj, name); + return 0; }
@@ -1515,7 +1517,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) */ gmu->dummy.size = SZ_4K; if (adreno_is_a660_family(adreno_gpu)) { - ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7, 0x60400000); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7, + 0x60400000, "debug"); if (ret) goto err_memory;
@@ -1523,23 +1526,24 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) }
/* Allocate memory for the GMU dummy page */ - ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size, 0x60000000); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size, + 0x60000000, "dummy"); if (ret) goto err_memory;
if (adreno_is_a650_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, - SZ_16M - SZ_16K, 0x04000); + SZ_16M - SZ_16K, 0x04000, "icache"); if (ret) goto err_memory; } else if (adreno_is_a640_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, - SZ_256K - SZ_16K, 0x04000); + SZ_256K - SZ_16K, 0x04000, "icache"); if (ret) goto err_memory;
ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache, - SZ_256K - SZ_16K, 0x44000); + SZ_256K - SZ_16K, 0x44000, "dcache"); if (ret) goto err_memory; } else { @@ -1547,18 +1551,18 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->legacy = true;
/* Allocate memory for the GMU debug region */ - ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug"); if (ret) goto err_memory; }
/* Allocate memory for for the HFI queues */ - ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi"); if (ret) goto err_memory;
/* Allocate memory for the GMU log region */ - ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_4K, 0); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_4K, 0, "log"); if (ret) goto err_memory;
From: Rob Clark robdclark@chromium.org
We don't expect either of these conditions to ever be true, so let's get shouty if they are.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a379f98aca54..6c42cc0ebe84 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -504,6 +504,9 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i, count = 0;
+ WARN_ON(gpu->needs_hw_init); + WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); + kref_init(&state->ref);
ktime_get_real_ts64(&state->time);
From: Rob Clark robdclark@chromium.org
Re-work the boost and idle clamping to use PM QoS requests instead, so they get aggreggated with other requests (such as cooling device).
This does have the minor side-effect that devfreq sysfs min_freq/ max_freq files now reflect the boost and idle clamping, as they show (despite what they are documented to show) the aggregated min/max freq. Fixing that in devfreq does not look straightforward after considering that OPPs can be dynamically added/removed. However writes to the sysfs files still behave as expected.
v2: Use 64b math to avoid potential 32b overflow
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gpu.h | 33 +++++++--- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 87 +++++++++++++++------------ 2 files changed, 71 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 59cdd00b69d0..96d8d37dd5b7 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -87,6 +87,21 @@ struct msm_gpu_devfreq { /** devfreq: devfreq instance */ struct devfreq *devfreq;
+ /** + * idle_constraint: + * + * A PM QoS constraint to limit max freq while the GPU is idle. + */ + struct dev_pm_qos_request idle_freq; + + /** + * boost_constraint: + * + * A PM QoS constraint to boost min freq for a period of time + * until the boost expires. + */ + struct dev_pm_qos_request boost_freq; + /** * busy_cycles: * @@ -103,22 +118,19 @@ struct msm_gpu_devfreq { ktime_t idle_time;
/** - * idle_freq: + * idle_work: * - * Shadow frequency used while the GPU is idle. From the PoV of - * the devfreq governor, we are continuing to sample busyness and - * adjust frequency while the GPU is idle, but we use this shadow - * value as the GPU is actually clamped to minimum frequency while - * it is inactive. + * Used to delay clamping to idle freq on active->idle transition. */ - unsigned long idle_freq; + struct msm_hrtimer_work idle_work;
/** - * idle_work: + * boost_work: * - * Used to delay clamping to idle freq on active->idle transition. + * Used to reset the boost_constraint after the boost period has + * elapsed */ - struct msm_hrtimer_work idle_work; + struct msm_hrtimer_work boost_work; };
struct msm_gpu { @@ -498,6 +510,7 @@ void msm_devfreq_init(struct msm_gpu *gpu); void msm_devfreq_cleanup(struct msm_gpu *gpu); void msm_devfreq_resume(struct msm_gpu *gpu); void msm_devfreq_suspend(struct msm_gpu *gpu); +void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor); void msm_devfreq_active(struct msm_gpu *gpu); void msm_devfreq_idle(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 1f55242bb6a1..11be623de2ab 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -9,6 +9,7 @@
#include <linux/devfreq.h> #include <linux/devfreq_cooling.h> +#include <linux/units.h>
/* * Power Management: @@ -22,15 +23,6 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
opp = devfreq_recommended_opp(dev, freq, flags);
- /* - * If the GPU is idle, devfreq is not aware, so just ignore - * it's requests - */ - if (gpu->devfreq.idle_freq) { - gpu->devfreq.idle_freq = *freq; - return 0; - } - if (IS_ERR(opp)) return PTR_ERR(opp);
@@ -48,9 +40,6 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
static unsigned long get_freq(struct msm_gpu *gpu) { - if (gpu->devfreq.idle_freq) - return gpu->devfreq.idle_freq; - if (gpu->funcs->gpu_get_freq) return gpu->funcs->gpu_get_freq(gpu);
@@ -88,6 +77,7 @@ static struct devfreq_dev_profile msm_devfreq_profile = { .get_cur_freq = msm_devfreq_get_cur_freq, };
+static void msm_devfreq_boost_work(struct kthread_work *work); static void msm_devfreq_idle_work(struct kthread_work *work);
void msm_devfreq_init(struct msm_gpu *gpu) @@ -98,6 +88,12 @@ void msm_devfreq_init(struct msm_gpu *gpu) if (!gpu->funcs->gpu_busy) return;
+ dev_pm_qos_add_request(&gpu->pdev->dev, &df->idle_freq, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + dev_pm_qos_add_request(&gpu->pdev->dev, &df->boost_freq, + DEV_PM_QOS_MIN_FREQUENCY, 0); + msm_devfreq_profile.initial_freq = gpu->fast_rate;
/* @@ -128,13 +124,19 @@ void msm_devfreq_init(struct msm_gpu *gpu) gpu->cooling = NULL; }
+ msm_hrtimer_work_init(&df->boost_work, gpu->worker, msm_devfreq_boost_work, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); msm_hrtimer_work_init(&df->idle_work, gpu->worker, msm_devfreq_idle_work, CLOCK_MONOTONIC, HRTIMER_MODE_REL); }
void msm_devfreq_cleanup(struct msm_gpu *gpu) { + struct msm_gpu_devfreq *df = &gpu->devfreq; + devfreq_cooling_unregister(gpu->cooling); + dev_pm_qos_remove_request(&df->boost_freq); + dev_pm_qos_remove_request(&df->idle_freq); }
void msm_devfreq_resume(struct msm_gpu *gpu) @@ -150,12 +152,40 @@ void msm_devfreq_suspend(struct msm_gpu *gpu) devfreq_suspend_device(gpu->devfreq.devfreq); }
+static void msm_devfreq_boost_work(struct kthread_work *work) +{ + struct msm_gpu_devfreq *df = container_of(work, + struct msm_gpu_devfreq, boost_work.work); + + dev_pm_qos_update_request(&df->boost_freq, 0); +} + +void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + uint64_t freq; + + freq = get_freq(gpu); + freq *= factor; + + /* + * A nice little trap is that PM QoS operates in terms of KHz, + * while devfreq operates in terms of Hz: + */ + do_div(freq, HZ_PER_KHZ); + + dev_pm_qos_update_request(&df->boost_freq, freq); + + msm_hrtimer_queue_work(&df->boost_work, + ms_to_ktime(msm_devfreq_profile.polling_ms), + HRTIMER_MODE_REL); +} + void msm_devfreq_active(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; struct devfreq_dev_status status; unsigned int idle_time; - unsigned long target_freq = df->idle_freq;
if (!df->devfreq) return; @@ -165,12 +195,6 @@ void msm_devfreq_active(struct msm_gpu *gpu) */ hrtimer_cancel(&df->idle_work.timer);
- /* - * Hold devfreq lock to synchronize with get_dev_status()/ - * target() callbacks - */ - mutex_lock(&df->devfreq->lock); - idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time));
/* @@ -179,20 +203,17 @@ void msm_devfreq_active(struct msm_gpu *gpu) * the governor to ramp up the freq.. so give some boost */ if (idle_time > msm_devfreq_profile.polling_ms) { - target_freq *= 2; + msm_devfreq_boost(gpu, 2); }
- df->idle_freq = 0; - - msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + dev_pm_qos_update_request(&df->idle_freq, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
/* * Reset the polling interval so we aren't inconsistent * about freq vs busy/total cycles */ msm_devfreq_get_dev_status(&gpu->pdev->dev, &status); - - mutex_unlock(&df->devfreq->lock); }
@@ -201,23 +222,11 @@ static void msm_devfreq_idle_work(struct kthread_work *work) struct msm_gpu_devfreq *df = container_of(work, struct msm_gpu_devfreq, idle_work.work); struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq); - unsigned long idle_freq, target_freq = 0; - - /* - * Hold devfreq lock to synchronize with get_dev_status()/ - * target() callbacks - */ - mutex_lock(&df->devfreq->lock); - - idle_freq = get_freq(gpu); - - if (gpu->clamp_to_idle) - msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0);
df->idle_time = ktime_get(); - df->idle_freq = idle_freq;
- mutex_unlock(&df->devfreq->lock); + if (gpu->clamp_to_idle) + dev_pm_qos_update_request(&df->idle_freq, 0); }
void msm_devfreq_idle(struct msm_gpu *gpu)
From: Rob Clark robdclark@chromium.org
Turn it into a thing we can use to snapshot other GMU buffers.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 34 +++++++++------------ 1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index f1b1a9bffb37..1de103f29d25 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -802,28 +802,24 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, &a6xx_state->gmu_registers[2], false); }
-static void a6xx_get_gmu_log(struct msm_gpu *gpu, - struct a6xx_gpu_state *a6xx_state) +static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( + struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - struct msm_gpu_state_bo *gmu_log; + struct msm_gpu_state_bo *snapshot;
- gmu_log = state_kcalloc(a6xx_state, - 1, sizeof(*a6xx_state->gmu_log)); - if (!gmu_log) - return; + snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); + if (!snapshot) + return NULL;
- gmu_log->iova = gmu->log.iova; - gmu_log->size = gmu->log.size; - gmu_log->data = kvzalloc(gmu_log->size, GFP_KERNEL); - if (!gmu_log->data) - return; + snapshot->iova = bo->iova; + snapshot->size = bo->size; + snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); + if (!snapshot->data) + return NULL;
- memcpy(gmu_log->data, gmu->log.virt, gmu->log.size); + memcpy(snapshot->data, bo->virt, bo->size);
- a6xx_state->gmu_log = gmu_log; + return snapshot; }
#define A6XX_GBIF_REGLIST_SIZE 1 @@ -963,7 +959,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
a6xx_get_gmu_registers(gpu, a6xx_state);
- a6xx_get_gmu_log(gpu, a6xx_state); + a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
/* If GX isn't on the rest of the data isn't going to be accessible */ if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) @@ -1006,7 +1002,7 @@ static void a6xx_gpu_state_destroy(struct kref *kref) struct a6xx_gpu_state *a6xx_state = container_of(state, struct a6xx_gpu_state, base);
- if (a6xx_state->gmu_log && a6xx_state->gmu_log->data) + if (a6xx_state->gmu_log) kvfree(a6xx_state->gmu_log->data);
list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
From: Rob Clark robdclark@chromium.org
This also includes a history of start index of the last 8 messages on each queue, since parsing backwards to decode recently sent HFI messages is hard(ish).
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 48 ++++++++++++++++++++- drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 10 +++++ drivers/gpu/drm/msm/adreno/a6xx_hfi.h | 11 +++++ 3 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 1de103f29d25..a84ba8982cb8 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -43,6 +43,9 @@ struct a6xx_gpu_state { int nr_cx_debugbus;
struct msm_gpu_state_bo *gmu_log; + struct msm_gpu_state_bo *gmu_hfi; + + s32 hfi_queue_history[2][HFI_HISTORY_SZ];
struct list_head objs; }; @@ -822,6 +825,25 @@ static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( return snapshot; }
+static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned i, j; + + BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); + + for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { + struct a6xx_hfi_queue *queue = &gmu->queues[i]; + for (j = 0; j < HFI_HISTORY_SZ; j++) { + unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; + a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; + } + } +} + #define A6XX_GBIF_REGLIST_SIZE 1 static void a6xx_get_registers(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state, @@ -960,6 +982,9 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) a6xx_get_gmu_registers(gpu, a6xx_state);
a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); + a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); + + a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
/* If GX isn't on the rest of the data isn't going to be accessible */ if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) @@ -1005,6 +1030,9 @@ static void a6xx_gpu_state_destroy(struct kref *kref) if (a6xx_state->gmu_log) kvfree(a6xx_state->gmu_log->data);
+ if (a6xx_state->gmu_hfi) + kvfree(a6xx_state->gmu_hfi->data); + list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) kfree(obj);
@@ -1223,11 +1251,29 @@ void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); - drm_printf(p, " size: %d\n", gmu_log->size); + drm_printf(p, " size: %zu\n", gmu_log->size); adreno_show_object(p, &gmu_log->data, gmu_log->size, &gmu_log->encoded); }
+ drm_puts(p, "gmu-hfi:\n"); + if (a6xx_state->gmu_hfi) { + struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; + unsigned i, j; + + drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); + drm_printf(p, " size: %zu\n", gmu_hfi->size); + for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { + drm_printf(p, " queue-history[%u]:", i); + for (j = 0; j < HFI_HISTORY_SZ; j++) { + drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); + } + drm_printf(p, "\n"); + } + adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, + &gmu_hfi->encoded); + } + drm_puts(p, "registers:\n"); for (i = 0; i < a6xx_state->nr_registers; i++) { struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index d4c65bf0a1b7..d73fce5fdf1f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -36,6 +36,8 @@ static int a6xx_hfi_queue_read(struct a6xx_gmu *gmu,
hdr = queue->data[index];
+ queue->history[(queue->history_idx++) % HFI_HISTORY_SZ] = index; + /* * If we are to assume that the GMU firmware is in fact a rational actor * and is programmed to not send us a larger response than we expect @@ -75,6 +77,8 @@ static int a6xx_hfi_queue_write(struct a6xx_gmu *gmu, return -ENOSPC; }
+ queue->history[(queue->history_idx++) % HFI_HISTORY_SZ] = index; + for (i = 0; i < dwords; i++) { queue->data[index] = data[i]; index = (index + 1) % header->size; @@ -600,6 +604,9 @@ void a6xx_hfi_stop(struct a6xx_gmu *gmu)
queue->header->read_index = 0; queue->header->write_index = 0; + + memset(&queue->history, 0xff, sizeof(queue->history)); + queue->history_idx = 0; } }
@@ -612,6 +619,9 @@ static void a6xx_hfi_queue_init(struct a6xx_hfi_queue *queue, queue->data = virt; atomic_set(&queue->seqnum, 0);
+ memset(&queue->history, 0xff, sizeof(queue->history)); + queue->history_idx = 0; + /* Set up the shared memory header */ header->iova = iova; header->type = 10 << 8 | id; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h index 2bd670ca42d6..528110169398 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -33,6 +33,17 @@ struct a6xx_hfi_queue { spinlock_t lock; u32 *data; atomic_t seqnum; + + /* + * Tracking for the start index of the last N messages in the + * queue, for the benefit of devcore dump / crashdec (since + * parsing in the reverse direction to decode the last N + * messages is difficult to do and would rely on heuristics + * which are not guaranteed to be correct) + */ +#define HFI_HISTORY_SZ 8 + s32 history[HFI_HISTORY_SZ]; + u8 history_idx; };
/* This is the outgoing queue to the GMU */
From: Rob Clark robdclark@chromium.org
It appears to be a GMU fw build option whether it does anything with debug and log buffers, but if they are all zeros it won't add anything to the devcore size.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index a84ba8982cb8..bdd0059a81ff 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -44,6 +44,7 @@ struct a6xx_gpu_state {
struct msm_gpu_state_bo *gmu_log; struct msm_gpu_state_bo *gmu_hfi; + struct msm_gpu_state_bo *gmu_debug;
s32 hfi_queue_history[2][HFI_HISTORY_SZ];
@@ -983,6 +984,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); + a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
@@ -1274,6 +1276,16 @@ void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, &gmu_hfi->encoded); }
+ drm_puts(p, "gmu-debug:\n"); + if (a6xx_state->gmu_debug) { + struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; + + drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); + drm_printf(p, " size: %zu\n", gmu_debug->size); + adreno_show_object(p, &gmu_debug->data, gmu_debug->size, + &gmu_debug->encoded); + } + drm_puts(p, "registers:\n"); for (i = 0; i < a6xx_state->nr_registers; i++) { struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
From: Rob Clark robdclark@chromium.org
If you don't realize is_a650_family() also encompasses a660 family, you'd think that the debug buffer is double allocated. Add a comment to make this more clear.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e1774ea342b1..3e325e2a2b1b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1531,6 +1531,7 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) goto err_memory;
+ /* Note that a650 family also includes a660 family: */ if (adreno_is_a650_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, SZ_16M - SZ_16K, 0x04000, "icache"); @@ -1547,6 +1548,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) goto err_memory; } else { + BUG_ON(adreno_is_a660_family(adreno_gpu)); + /* HFI v1, has sptprac */ gmu->legacy = true;
dri-devel@lists.freedesktop.org