On 22/06/2022 15:36, Adrián Larumbe wrote:
In the event of a job timeout, debug dump information will be written into /sys/class/devcoredump.
Inspired by etnaviv's similar feature.
Signed-off-by: Adrián Larumbe adrian.larumbe@collabora.com
Looks pretty good, a few comments below.
drivers/gpu/drm/panfrost/Kconfig | 1 + drivers/gpu/drm/panfrost/Makefile | 3 +- drivers/gpu/drm/panfrost/panfrost_dump.c | 249 +++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_dump.h | 12 ++ drivers/gpu/drm/panfrost/panfrost_job.c | 3 + include/uapi/drm/panfrost_drm.h | 47 +++++ 6 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.h
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig index 86cdc0ce79e6..079600328be1 100644 --- a/drivers/gpu/drm/panfrost/Kconfig +++ b/drivers/gpu/drm/panfrost/Kconfig @@ -11,6 +11,7 @@ config DRM_PANFROST select DRM_GEM_SHMEM_HELPER select PM_DEVFREQ select DEVFREQ_GOV_SIMPLE_ONDEMAND
- select WANT_DEV_COREDUMP help DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile index b71935862417..7da2b3f02ed9 100644 --- a/drivers/gpu/drm/panfrost/Makefile +++ b/drivers/gpu/drm/panfrost/Makefile @@ -9,6 +9,7 @@ panfrost-y := \ panfrost_gpu.o \ panfrost_job.o \ panfrost_mmu.o \
- panfrost_perfcnt.o
- panfrost_perfcnt.o \
- panfrost_dump.o
obj-$(CONFIG_DRM_PANFROST) += panfrost.o diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c new file mode 100644 index 000000000000..a710ed7bcefa --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_dump.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2021 Collabora ltd. */
+#include <linux/err.h> +#include <linux/device.h> +#include <linux/devcoredump.h> +#include <linux/moduleparam.h> +#include <linux/iosys-map.h> +#include <drm/panfrost_drm.h> +#include <drm/drm_device.h>
+#include "panfrost_job.h" +#include "panfrost_gem.h" +#include "panfrost_regs.h" +#include "panfrost_dump.h" +#include "panfrost_device.h"
+static bool panfrost_dump_core = true; +module_param_named(dump_core, panfrost_dump_core, bool, 0600);
+struct panfrost_dump_iterator {
- void *start;
- struct panfrost_dump_object_header *hdr;
- void *data;
+};
+static const unsigned short panfrost_dump_registers[] = {
- SHADER_READY_LO,
- SHADER_READY_HI,
- TILER_READY_LO,
- TILER_READY_HI,
- L2_READY_LO,
- L2_READY_HI,
- JOB_INT_MASK,
- JOB_INT_STAT,
- JS_HEAD_LO(0),
- JS_HEAD_HI(0),
- JS_TAIL_LO(0),
- JS_TAIL_HI(0),
- JS_AFFINITY_LO(0),
- JS_AFFINITY_HI(0),
- JS_CONFIG(0),
- JS_STATUS(0),
- JS_HEAD_NEXT_LO(0),
- JS_HEAD_NEXT_HI(0),
- JS_AFFINITY_NEXT_LO(0),
- JS_AFFINITY_NEXT_HI(0),
- JS_CONFIG_NEXT(0),
- MMU_INT_MASK,
- MMU_INT_STAT,
- AS_TRANSTAB_LO(0),
- AS_TRANSTAB_HI(0),
- AS_MEMATTR_LO(0),
- AS_MEMATTR_HI(0),
- AS_FAULTSTATUS(0),
- AS_FAULTADDRESS_LO(0),
- AS_FAULTADDRESS_HI(0),
- AS_STATUS(0),
+};
+static void panfrost_core_dump_header(struct panfrost_dump_iterator *iter,
- u32 type, void *data_end)
+{
- struct panfrost_dump_object_header *hdr = iter->hdr;
- hdr->magic = cpu_to_le32(PANFROSTDUMP_MAGIC);
- hdr->type = cpu_to_le32(type);
- hdr->file_offset = cpu_to_le32(iter->data - iter->start);
- hdr->file_size = cpu_to_le32(data_end - iter->data);
- iter->hdr++;
- iter->data += le32_to_cpu(hdr->file_size);
+}
+static void +panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
struct panfrost_device *pfdev,
u32 as_nr, int slot)
+{
- struct panfrost_dump_registers *dumpreg = iter->data;
- unsigned int i;
- for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
unsigned int js_as_offset = 0;
unsigned int reg;
if (panfrost_dump_registers[i] >= JS_HEAD_LO(0) &&
panfrost_dump_registers[i] <= JS_CONFIG_NEXT(0))
It would be good to use something more generic than specific registers in case the list of registers is ever changed. We have JS_BASE already which would work for the lower end. We seem to be missing the equivalent MMU_BASE #define currently. The upper end is base+stride, so for JS we have:
if (panfrost_dump_registers[i] >= JS_BASE && panfrost_dump_registers[i] < JS_BASE + JS_SLOT_STRIDE)
js_as_offset = slot * JS_SLOT_STRIDE;
else if (panfrost_dump_registers[i] >= AS_TRANSTAB_LO(0) &&
panfrost_dump_registers[i] <= AS_STATUS(0))
js_as_offset = (as_nr << MMU_AS_SHIFT);
reg = panfrost_dump_registers[i] + js_as_offset;
dumpreg->reg = cpu_to_le32(reg);
dumpreg->value = cpu_to_le32(gpu_read(pfdev, reg));
- }
- panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
+}
+void panfrost_core_dump(struct panfrost_job *job) +{
- struct panfrost_device *pfdev = job->pfdev;
- struct panfrost_dump_iterator iter;
- struct drm_gem_object *dbo;
- unsigned int n_obj, n_bomap_pages;
- __le64 *bomap, *bomap_start;
- size_t file_size;
- u32 as_nr;
- int slot;
- int ret, i;
- as_nr = job->mmu->as;
- slot = panfrost_job_get_slot(job);
- /* Only catch the first event, or when manually re-armed */
- if (!panfrost_dump_core)
return;
- panfrost_dump_core = false;
- /* At least, we dump registers and end marker */
- n_obj = 2;
- n_bomap_pages = 0;
- file_size = ARRAY_SIZE(panfrost_dump_registers) *
sizeof(struct panfrost_dump_registers);
- /* Add in the active buffer objects */
- for (i = 0; i < job->bo_count; i++) {
/*
* Even though the CPU could be configured to use 16K or 64K pages, this
* is a very unusual situation for most kernel setups on SoCs that have
* a Panfrost device. Also many places across the driver make the somewhat
* arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
* so let's have a sanity check to ensure that's always the case
*/
WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
This warn needs to go after the assignment below - I just spent a few minutes tracking down why I was getting a NULL pointer dereference. Sadly my GCC doesn't seem to be able to warn about this uninitialised use :(
dbo = job->bos[i];
file_size += dbo->size;
n_bomap_pages += dbo->size >> PAGE_SHIFT;
n_obj++;
- }
- /* If we have any buffer objects, add a bomap object */
- if (n_bomap_pages) {
file_size += n_bomap_pages * sizeof(*bomap);
n_obj++;
- }
- /* Add the size of the headers */
- file_size += sizeof(*iter.hdr) * n_obj;
- /*
* Allocate the file in vmalloc memory, it's likely to be big.
* The reason behind these GFP flags is that we don't want to trigger the
* OOM killer in the event that not enough memory could be found for our
* dump file. We also don't want the allocator to do any error reporting,
* as the right behaviour is failing gracefully if a big enough buffer
* could not be allocated.
*/
- iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
__GFP_NORETRY);
- if (!iter.start) {
dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
return;
- }
- /* Point the data member after the headers */
- iter.hdr = iter.start;
- iter.data = &iter.hdr[n_obj];
- memset(iter.hdr, 0, iter.data - iter.start);
- /*
* For now, we write the job identifier in the register dump header,
* so that we can decode the entire dump later with pandecode
*/
- iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
- iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
- iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
- iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
- iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
- panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
- /* Reserve space for the bomap */
- if (job->bo_count) {
bomap_start = bomap = iter.data;
memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
bomap + n_bomap_pages);
- }
- for (i = 0; i < job->bo_count; i++) {
struct iosys_map map;
struct panfrost_gem_mapping *mapping;
struct panfrost_gem_object *bo;
struct sg_page_iter page_iter;
void *vaddr;
bo = to_panfrost_bo(job->bos[i]);
mapping = job->mappings[i];
if (!bo->base.sgt) {
dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
iter.hdr->bomap.valid = 0;
goto dump_header;
}
ret = drm_gem_shmem_vmap(&bo->base, &map);
if (ret) {
dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
iter.hdr->bomap.valid = 0;
goto dump_header;
}
WARN_ON(!mapping->active);
iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
struct page *page = sg_page_iter_page(&page_iter);
if (!IS_ERR(page)) {
*bomap++ = cpu_to_le64(page_to_phys(page));
} else {
dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
*bomap++ = ~cpu_to_le64(0);
}
}
iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
vaddr = map.vaddr;
memcpy(iter.data, vaddr, bo->base.base.size);
drm_gem_shmem_vunmap(&bo->base, &map);
iter.hdr->bomap.valid = cpu_to_le64(1);
'valid' is only 32 bit so this should be cpu_to_le32(1).
Steve
+dump_header: panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BO, iter.data +
bo->base.base.size);
- }
- panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_TRAILER, iter.data);
- dev_coredumpv(pfdev->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+} diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.h b/drivers/gpu/drm/panfrost/panfrost_dump.h new file mode 100644 index 000000000000..7d9bcefa5346 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_dump.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*
- Copyright 2021 Collabora ltd.
- */
+#ifndef PANFROST_DUMP_H +#define PANFROST_DUMP_H
+struct panfrost_job; +void panfrost_core_dump(struct panfrost_job *job);
+#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 7c4208476fbd..dbc597ab46fb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -20,6 +20,7 @@ #include "panfrost_regs.h" #include "panfrost_gpu.h" #include "panfrost_mmu.h" +#include "panfrost_dump.h"
#define JOB_TIMEOUT_MS 500
@@ -727,6 +728,8 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job job_read(pfdev, JS_TAIL_LO(js)), sched_job);
- panfrost_core_dump(job);
- atomic_set(&pfdev->reset.pending, 1); panfrost_reset(pfdev, sched_job);
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 9e40277d8185..eac87310b348 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -224,6 +224,53 @@ struct drm_panfrost_madvise { __u32 retained; /* out, whether backing store still exists */ };
+/* Definitions for coredump decoding in user space */ +#define PANFROSTDUMP_MAJOR 1 +#define PANFROSTDUMP_MINOR 0
+#define PANFROSTDUMP_MAGIC 0x464E4150 /* PANF */
+#define PANFROSTDUMP_BUF_REG 0 +#define PANFROSTDUMP_BUF_BOMAP (PANFROSTDUMP_BUF_REG + 1) +#define PANFROSTDUMP_BUF_BO (PANFROSTDUMP_BUF_BOMAP + 1) +#define PANFROSTDUMP_BUF_TRAILER (PANFROSTDUMP_BUF_BO + 1)
+struct panfrost_dump_object_header {
- __le32 magic;
- __le32 type;
- __le32 file_size;
- __le32 file_offset;
- union {
struct pan_reg_hdr {
__le64 jc;
__le32 gpu_id;
__le32 major;
__le32 minor;
__le64 nbos;
} reghdr;
struct pan_bomap_hdr {
__le32 valid;
__le64 iova;
__le32 data[2];
} bomap;
/*
* Force same size in case we want to expand the header
* with new fields and also keep it 512-byte aligned
*/
__le32 sizer[496];
- };
+};
+/* Registers object, an array of these */ +struct panfrost_dump_registers {
- __le32 reg;
- __le32 value;
+};
#if defined(__cplusplus) } #endif