diff mbox series

[v3,1/1] drm/panfrost: Add support for devcoredump

Message ID 20220622015420.1130814-2-adrian.larumbe@collabora.com (mailing list archive)
State New, archived
Headers show
Series devcoredump support for Panfrost GPU driver | expand

Commit Message

Adrián Martínez Larumbe June 22, 2022, 1:54 a.m. UTC
In the event of a job timeout, debug dump information will be written into
/sys/class/devcoredump.

Inspired by etnaviv's similar feature.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/Kconfig         |   1 +
 drivers/gpu/drm/panfrost/Makefile        |   3 +-
 drivers/gpu/drm/panfrost/panfrost_dump.c | 243 +++++++++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_dump.h |  12 ++
 drivers/gpu/drm/panfrost/panfrost_job.c  |   3 +
 drivers/gpu/drm/panfrost/panfrost_regs.h |  39 ++--
 include/uapi/drm/panfrost_drm.h          |  47 +++++
 7 files changed, 329 insertions(+), 19 deletions(-)
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.c
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.h

Comments

Alyssa Rosenzweig June 22, 2022, 12:17 p.m. UTC | #1
> +			js_as_offset = slot * 0x80;

JS_SLOT_STRIDE

> +	slot = panfrost_job_get_slot(job);
> +	slot = slot ? slot : 0;

`slot = slot ? slot : 0` is a no-op. Delete the line.

> +			if (!IS_ERR(page))
> +				*bomap++ = cpu_to_le64(page_to_phys(page));
> +			else {
> +				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
> +				*bomap++ = ~cpu_to_le64(0);
> +			}
> +		}

Nit: because you have { braces } around half the if, please add
{ braces } around the other half for consistency.

---

As a general note, I'd appreciate breaking out the panfrost_regs.h
changes into a separate patch, as they are a logically separate clean
up to make room for this patch. Thanks.
kernel test robot June 22, 2022, 12:29 p.m. UTC | #2
Hi "Adrián,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm/drm-next]
[also build test WARNING on linus/master v5.19-rc3 next-20220622]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Adri-n-Larumbe/devcoredump-support-for-Panfrost-GPU-driver/20220622-095645
base:   git://anongit.freedesktop.org/drm/drm drm-next
config: s390-buildonly-randconfig-r004-20220622 (https://download.01.org/0day-ci/archive/20220622/202206222000.JafXnPo9-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 8b8d126598ce7bd5243da7f94f69fa1104288bee)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install s390 cross compiling tool for clang build
        # apt-get install binutils-s390x-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/966f660f30dc5a9ff854e61d189b81e92f3453fa
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Adri-n-Larumbe/devcoredump-support-for-Panfrost-GPU-driver/20220622-095645
        git checkout 966f660f30dc5a9ff854e61d189b81e92f3453fa
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash drivers/gpu/drm/amd/display/dc/ drivers/gpu/drm/panfrost/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/panfrost/panfrost_dump.c:6:
   In file included from include/linux/devcoredump.h:12:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
                                                             ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
   #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
                                                        ^
   In file included from drivers/gpu/drm/panfrost/panfrost_dump.c:6:
   In file included from include/linux/devcoredump.h:12:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
                                                             ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
   #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
                                                        ^
   In file included from drivers/gpu/drm/panfrost/panfrost_dump.c:6:
   In file included from include/linux/devcoredump.h:12:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:609:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsb(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:617:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsw(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:625:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsl(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:634:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesb(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:643:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesw(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:652:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesl(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
>> drivers/gpu/drm/panfrost/panfrost_dump.c:235:27: warning: implicit conversion from '__le64' (aka 'unsigned long long') to '__le32' (aka 'unsigned int') changes value from 72057594037927936 to 0 [-Wconstant-conversion]
                   iter.hdr->bomap.valid = cpu_to_le64(1);
                                         ~ ^~~~~~~~~~~~~~
   include/linux/byteorder/generic.h:86:21: note: expanded from macro 'cpu_to_le64'
   #define cpu_to_le64 __cpu_to_le64
                       ^
   include/uapi/linux/byteorder/big_endian.h:32:27: note: expanded from macro '__cpu_to_le64'
   #define __cpu_to_le64(x) ((__force __le64)__swab64((x)))
                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   13 warnings generated.


vim +235 drivers/gpu/drm/panfrost/panfrost_dump.c

   102	
   103	void panfrost_core_dump(struct panfrost_job *job)
   104	{
   105		struct panfrost_device *pfdev = job->pfdev;
   106		struct panfrost_dump_iterator iter;
   107		struct drm_gem_object *dbo;
   108		unsigned int n_obj, n_bomap_pages;
   109		__le64 *bomap, *bomap_start;
   110		size_t file_size;
   111		u32 as_nr;
   112		int slot;
   113		int ret, i;
   114	
   115		as_nr = job->mmu->as;
   116		slot = panfrost_job_get_slot(job);
   117		slot = slot ? slot : 0;
   118	
   119		/* Only catch the first event, or when manually re-armed */
   120		if (!panfrost_dump_core)
   121			return;
   122		panfrost_dump_core = false;
   123	
   124		/* At least, we dump registers and end marker */
   125		n_obj = 2;
   126		n_bomap_pages = 0;
   127		file_size = ARRAY_SIZE(panfrost_dump_registers) *
   128				sizeof(struct panfrost_dump_registers);
   129	
   130		/* Add in the active buffer objects */
   131		for (i = 0; i < job->bo_count; i++) {
   132			/*
   133			 * Even though the CPU could be configured to use 16K or 64K pages, this
   134			 * is a very unusual situation for most kernel setups on SoCs that have
   135			 * a Panfrost device. Also many places across the driver make the somewhat
   136			 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
   137			 * so let's have a sanity check to ensure that's always the case
   138			 */
   139			WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
   140	
   141			dbo = job->bos[i];
   142			file_size += dbo->size;
   143			n_bomap_pages += dbo->size >> PAGE_SHIFT;
   144			n_obj++;
   145		}
   146	
   147		/* If we have any buffer objects, add a bomap object */
   148		if (n_bomap_pages) {
   149			file_size += n_bomap_pages * sizeof(*bomap);
   150			n_obj++;
   151		}
   152	
   153		/* Add the size of the headers */
   154		file_size += sizeof(*iter.hdr) * n_obj;
   155	
   156		/* Allocate the file in vmalloc memory, it's likely to be big */
   157		iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
   158				__GFP_NORETRY);
   159		if (!iter.start) {
   160			dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
   161			return;
   162		}
   163	
   164		/* Point the data member after the headers */
   165		iter.hdr = iter.start;
   166		iter.data = &iter.hdr[n_obj];
   167	
   168		memset(iter.hdr, 0, iter.data - iter.start);
   169	
   170		/*
   171		 * For now, we write the job identifier in the register dump header,
   172		 * so that we can decode the entire dump later with pandecode
   173		 */
   174		iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
   175		iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
   176		iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
   177		iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
   178		iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
   179	
   180		panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
   181	
   182		/* Reserve space for the bomap */
   183		if (job->bo_count) {
   184			bomap_start = bomap = iter.data;
   185			memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
   186			panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
   187						  bomap + n_bomap_pages);
   188		}
   189	
   190		for (i = 0; i < job->bo_count; i++) {
   191			struct iosys_map map;
   192			struct panfrost_gem_mapping *mapping;
   193			struct panfrost_gem_object *bo;
   194			struct sg_page_iter page_iter;
   195			void *vaddr;
   196	
   197			bo = to_panfrost_bo(job->bos[i]);
   198			mapping = job->mappings[i];
   199	
   200			if (!bo->base.sgt) {
   201				dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
   202				iter.hdr->bomap.valid = 0;
   203				goto dump_header;
   204			}
   205	
   206			ret = drm_gem_shmem_vmap(&bo->base, &map);
   207			if (ret) {
   208				dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
   209				iter.hdr->bomap.valid = 0;
   210				goto dump_header;
   211			}
   212	
   213			WARN_ON(!mapping->active);
   214	
   215			iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
   216	
   217			for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
   218				struct page *page = sg_page_iter_page(&page_iter);
   219	
   220				if (!IS_ERR(page))
   221					*bomap++ = cpu_to_le64(page_to_phys(page));
   222				else {
   223					dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
   224					*bomap++ = ~cpu_to_le64(0);
   225				}
   226			}
   227	
   228			iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
   229	
   230			vaddr = map.vaddr;
   231			memcpy(iter.data, vaddr, bo->base.base.size);
   232	
   233			drm_gem_shmem_vunmap(&bo->base, &map);
   234	
 > 235			iter.hdr->bomap.valid = cpu_to_le64(1);
   236
Adrián Martínez Larumbe June 22, 2022, 2 p.m. UTC | #3
On 22.06.2022 08:17, Alyssa Rosenzweig wrote:
>> +			js_as_offset = slot * 0x80;
>
>JS_SLOT_STRIDE

Sorry about this blunder.

>> +	slot = panfrost_job_get_slot(job);
>> +	slot = slot ? slot : 0;
>
>`slot = slot ? slot : 0` is a no-op. Delete the line.

I think what I meant here was 'slot = (slot >= 0) ? slot : 0;' but for some
reason I blundered again. The point of this was ensuring the slot value wouldn't
end up wrapping about the maximum unsigned integer value when using it as an
array offset, in the off-chance that panfrost_job_get_slot() ever returned a
negative value.

In v4 I've instead rewritten this as a sanity check:

WARN_ON(slot < 0);

Although perhaps in the future panfrost_job_get_slot should return an unsigned
integer instead?

>> +			if (!IS_ERR(page))
>> +				*bomap++ = cpu_to_le64(page_to_phys(page));
>> +			else {
>> +				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
>> +				*bomap++ = ~cpu_to_le64(0);
>> +			}
>> +		}
>
>Nit: because you have { braces } around half the if, please add
>{ braces } around the other half for consistency.

I thought checkpatch.pl would complain about braces wrapping a single if
statement, but apparently it's fine with it in this case.

>---
>
>As a general note, I'd appreciate breaking out the panfrost_regs.h
>changes into a separate patch, as they are a logically separate clean
>up to make room for this patch. Thanks.

Done in v4.

Cheers,
Adrian
Alyssa Rosenzweig June 22, 2022, 2:16 p.m. UTC | #4
> Sorry about this blunder.
> 
> >> +	slot = panfrost_job_get_slot(job);
> >> +	slot = slot ? slot : 0;
> >
> >`slot = slot ? slot : 0` is a no-op. Delete the line.
> 
> I think what I meant here was 'slot = (slot >= 0) ? slot : 0;' but for some
> reason I blundered again. The point of this was ensuring the slot value wouldn't
> end up wrapping about the maximum unsigned integer value when using it as an
> array offset, in the off-chance that panfrost_job_get_slot() ever returned a
> negative value.
> 
> In v4 I've instead rewritten this as a sanity check:
> 
> WARN_ON(slot < 0);

No, this doesn't make sense. There at most 3 job slots -- 0, 1, and 2.

> Although perhaps in the future panfrost_job_get_slot should return an unsigned
> integer instead?

Sure. Kernel style doesn't seem big on unsigned, if this were
mesa it would return unsigned. Returning u8 or u32 seems reasonable at
any rate.

> >As a general note, I'd appreciate breaking out the panfrost_regs.h
> >changes into a separate patch, as they are a logically separate clean
> >up to make room for this patch. Thanks.
> 
> Done in v4.

Thanks!

Alyssa
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index 86cdc0ce79e6..079600328be1 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -11,6 +11,7 @@  config DRM_PANFROST
 	select DRM_GEM_SHMEM_HELPER
 	select PM_DEVFREQ
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select WANT_DEV_COREDUMP
 	help
 	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
 	  Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index b71935862417..7da2b3f02ed9 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -9,6 +9,7 @@  panfrost-y := \
 	panfrost_gpu.o \
 	panfrost_job.o \
 	panfrost_mmu.o \
-	panfrost_perfcnt.o
+	panfrost_perfcnt.o \
+	panfrost_dump.o
 
 obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c
new file mode 100644
index 000000000000..43d3da45735c
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.c
@@ -0,0 +1,243 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2021 Collabora ltd. */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/devcoredump.h>
+#include <linux/moduleparam.h>
+#include <linux/iosys-map.h>
+#include <drm/panfrost_drm.h>
+#include <drm/drm_device.h>
+
+#include "panfrost_job.h"
+#include "panfrost_gem.h"
+#include "panfrost_regs.h"
+#include "panfrost_dump.h"
+#include "panfrost_device.h"
+
+static bool panfrost_dump_core = true;
+module_param_named(dump_core, panfrost_dump_core, bool, 0600);
+
+struct panfrost_dump_iterator {
+	void *start;
+	struct panfrost_dump_object_header *hdr;
+	void *data;
+};
+
+static const unsigned short panfrost_dump_registers[] = {
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI,
+	JOB_INT_MASK,
+	JOB_INT_STAT,
+	JS_HEAD_LO(0),
+	JS_HEAD_HI(0),
+	JS_TAIL_LO(0),
+	JS_TAIL_HI(0),
+	JS_AFFINITY_LO(0),
+	JS_AFFINITY_HI(0),
+	JS_CONFIG(0),
+	JS_STATUS(0),
+	JS_HEAD_NEXT_LO(0),
+	JS_HEAD_NEXT_HI(0),
+	JS_AFFINITY_NEXT_LO(0),
+	JS_AFFINITY_NEXT_HI(0),
+	JS_CONFIG_NEXT(0),
+	MMU_INT_MASK,
+	MMU_INT_STAT,
+	AS_TRANSTAB_LO(0),
+	AS_TRANSTAB_HI(0),
+	AS_MEMATTR_LO(0),
+	AS_MEMATTR_HI(0),
+	AS_FAULTSTATUS(0),
+	AS_FAULTADDRESS_LO(0),
+	AS_FAULTADDRESS_HI(0),
+	AS_STATUS(0),
+};
+
+static void panfrost_core_dump_header(struct panfrost_dump_iterator *iter,
+	u32 type, void *data_end)
+{
+	struct panfrost_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = cpu_to_le32(PANFROSTDUMP_MAGIC);
+	hdr->type = cpu_to_le32(type);
+	hdr->file_offset = cpu_to_le32(iter->data - iter->start);
+	hdr->file_size = cpu_to_le32(data_end - iter->data);
+
+	iter->hdr++;
+	iter->data += le32_to_cpu(hdr->file_size);
+}
+
+static void
+panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
+			     struct panfrost_device *pfdev,
+			     u32 as_nr, int slot)
+{
+	struct panfrost_dump_registers *dumpreg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
+		unsigned int js_as_offset = 0;
+		unsigned int reg;
+
+		if (panfrost_dump_registers[i] >= JS_HEAD_LO(0) &&
+		    panfrost_dump_registers[i] <= JS_CONFIG_NEXT(0))
+			js_as_offset = slot * 0x80;
+		else if (panfrost_dump_registers[i] >= AS_TRANSTAB_LO(0) &&
+			 panfrost_dump_registers[i] <= AS_STATUS(0))
+			js_as_offset = (as_nr << MMU_AS_SHIFT);
+
+		reg = panfrost_dump_registers[i] + js_as_offset;
+
+		dumpreg->reg = cpu_to_le32(reg);
+		dumpreg->value = cpu_to_le32(gpu_read(pfdev, reg));
+	}
+
+	panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
+}
+
+void panfrost_core_dump(struct panfrost_job *job)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	struct panfrost_dump_iterator iter;
+	struct drm_gem_object *dbo;
+	unsigned int n_obj, n_bomap_pages;
+	__le64 *bomap, *bomap_start;
+	size_t file_size;
+	u32 as_nr;
+	int slot;
+	int ret, i;
+
+	as_nr = job->mmu->as;
+	slot = panfrost_job_get_slot(job);
+	slot = slot ? slot : 0;
+
+	/* Only catch the first event, or when manually re-armed */
+	if (!panfrost_dump_core)
+		return;
+	panfrost_dump_core = false;
+
+	/* At least, we dump registers and end marker */
+	n_obj = 2;
+	n_bomap_pages = 0;
+	file_size = ARRAY_SIZE(panfrost_dump_registers) *
+			sizeof(struct panfrost_dump_registers);
+
+	/* Add in the active buffer objects */
+	for (i = 0; i < job->bo_count; i++) {
+		/*
+		 * Even though the CPU could be configured to use 16K or 64K pages, this
+		 * is a very unusual situation for most kernel setups on SoCs that have
+		 * a Panfrost device. Also many places across the driver make the somewhat
+		 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
+		 * so let's have a sanity check to ensure that's always the case
+		 */
+		WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
+
+		dbo = job->bos[i];
+		file_size += dbo->size;
+		n_bomap_pages += dbo->size >> PAGE_SHIFT;
+		n_obj++;
+	}
+
+	/* If we have any buffer objects, add a bomap object */
+	if (n_bomap_pages) {
+		file_size += n_bomap_pages * sizeof(*bomap);
+		n_obj++;
+	}
+
+	/* Add the size of the headers */
+	file_size += sizeof(*iter.hdr) * n_obj;
+
+	/* Allocate the file in vmalloc memory, it's likely to be big */
+	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+			__GFP_NORETRY);
+	if (!iter.start) {
+		dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
+		return;
+	}
+
+	/* Point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[n_obj];
+
+	memset(iter.hdr, 0, iter.data - iter.start);
+
+	/*
+	 * For now, we write the job identifier in the register dump header,
+	 * so that we can decode the entire dump later with pandecode
+	 */
+	iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
+	iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
+	iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
+	iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
+	iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
+
+	panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
+
+	/* Reserve space for the bomap */
+	if (job->bo_count) {
+		bomap_start = bomap = iter.data;
+		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
+		panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
+					  bomap + n_bomap_pages);
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		struct iosys_map map;
+		struct panfrost_gem_mapping *mapping;
+		struct panfrost_gem_object *bo;
+		struct sg_page_iter page_iter;
+		void *vaddr;
+
+		bo = to_panfrost_bo(job->bos[i]);
+		mapping = job->mappings[i];
+
+		if (!bo->base.sgt) {
+			dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		ret = drm_gem_shmem_vmap(&bo->base, &map);
+		if (ret) {
+			dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		WARN_ON(!mapping->active);
+
+		iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
+
+		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
+			struct page *page = sg_page_iter_page(&page_iter);
+
+			if (!IS_ERR(page))
+				*bomap++ = cpu_to_le64(page_to_phys(page));
+			else {
+				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
+				*bomap++ = ~cpu_to_le64(0);
+			}
+		}
+
+		iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
+
+		vaddr = map.vaddr;
+		memcpy(iter.data, vaddr, bo->base.base.size);
+
+		drm_gem_shmem_vunmap(&bo->base, &map);
+
+		iter.hdr->bomap.valid = cpu_to_le64(1);
+
+dump_header:	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BO, iter.data +
+					  bo->base.base.size);
+	}
+	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_TRAILER, iter.data);
+
+	dev_coredumpv(pfdev->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.h b/drivers/gpu/drm/panfrost/panfrost_dump.h
new file mode 100644
index 000000000000..7d9bcefa5346
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Collabora ltd.
+ */
+
+#ifndef PANFROST_DUMP_H
+#define PANFROST_DUMP_H
+
+struct panfrost_job;
+void panfrost_core_dump(struct panfrost_job *job);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 7c4208476fbd..dbc597ab46fb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -20,6 +20,7 @@ 
 #include "panfrost_regs.h"
 #include "panfrost_gpu.h"
 #include "panfrost_mmu.h"
+#include "panfrost_dump.h"
 
 #define JOB_TIMEOUT_MS 500
 
@@ -727,6 +728,8 @@  static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
 		job_read(pfdev, JS_TAIL_LO(js)),
 		sched_job);
 
+	panfrost_core_dump(job);
+
 	atomic_set(&pfdev->reset.pending, 1);
 	panfrost_reset(pfdev, sched_job);
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index accb4fa3adb8..1ddc6c4c5e1c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -225,24 +225,26 @@ 
 #define JOB_INT_MASK_ERR(j)		BIT((j) + 16)
 #define JOB_INT_MASK_DONE(j)		BIT(j)
 
+#define JS_SLOT_STRIDE			0x80
+
 #define JS_BASE				0x1800
-#define JS_HEAD_LO(n)			(JS_BASE + ((n) * 0x80) + 0x00)
-#define JS_HEAD_HI(n)			(JS_BASE + ((n) * 0x80) + 0x04)
-#define JS_TAIL_LO(n)			(JS_BASE + ((n) * 0x80) + 0x08)
-#define JS_TAIL_HI(n)			(JS_BASE + ((n) * 0x80) + 0x0c)
-#define JS_AFFINITY_LO(n)		(JS_BASE + ((n) * 0x80) + 0x10)
-#define JS_AFFINITY_HI(n)		(JS_BASE + ((n) * 0x80) + 0x14)
-#define JS_CONFIG(n)			(JS_BASE + ((n) * 0x80) + 0x18)
-#define JS_XAFFINITY(n)			(JS_BASE + ((n) * 0x80) + 0x1c)
-#define JS_COMMAND(n)			(JS_BASE + ((n) * 0x80) + 0x20)
-#define JS_STATUS(n)			(JS_BASE + ((n) * 0x80) + 0x24)
-#define JS_HEAD_NEXT_LO(n)		(JS_BASE + ((n) * 0x80) + 0x40)
-#define JS_HEAD_NEXT_HI(n)		(JS_BASE + ((n) * 0x80) + 0x44)
-#define JS_AFFINITY_NEXT_LO(n)		(JS_BASE + ((n) * 0x80) + 0x50)
-#define JS_AFFINITY_NEXT_HI(n)		(JS_BASE + ((n) * 0x80) + 0x54)
-#define JS_CONFIG_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x58)
-#define JS_COMMAND_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x60)
-#define JS_FLUSH_ID_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x70)
+#define JS_HEAD_LO(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x00)
+#define JS_HEAD_HI(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x04)
+#define JS_TAIL_LO(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x08)
+#define JS_TAIL_HI(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x0c)
+#define JS_AFFINITY_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x10)
+#define JS_AFFINITY_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x14)
+#define JS_CONFIG(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x18)
+#define JS_XAFFINITY(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x1c)
+#define JS_COMMAND(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x20)
+#define JS_STATUS(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x24)
+#define JS_HEAD_NEXT_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x40)
+#define JS_HEAD_NEXT_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x44)
+#define JS_AFFINITY_NEXT_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x50)
+#define JS_AFFINITY_NEXT_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x54)
+#define JS_CONFIG_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x58)
+#define JS_COMMAND_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x60)
+#define JS_FLUSH_ID_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x70)
 
 /* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
 #define JS_CONFIG_START_FLUSH_CLEAN		BIT(8)
@@ -281,7 +283,8 @@ 
 #define AS_COMMAND_FLUSH_MEM		0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
 						   flush all L2 caches then issue a flush region command to all MMUs */
 
-#define MMU_AS(as)			(0x2400 + ((as) << 6))
+#define MMU_AS_SHIFT			0x06
+#define MMU_AS(as)			(0x2400 + ((as) << MMU_AS_SHIFT))
 
 #define AS_TRANSTAB_LO(as)		(MMU_AS(as) + 0x00) /* (RW) Translation Table Base Address for address space n, low word */
 #define AS_TRANSTAB_HI(as)		(MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 9e40277d8185..eac87310b348 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -224,6 +224,53 @@  struct drm_panfrost_madvise {
 	__u32 retained;       /* out, whether backing store still exists */
 };
 
+/* Definitions for coredump decoding in user space */
+#define PANFROSTDUMP_MAJOR 1
+#define PANFROSTDUMP_MINOR 0
+
+#define PANFROSTDUMP_MAGIC 0x464E4150 /* PANF */
+
+#define PANFROSTDUMP_BUF_REG 0
+#define PANFROSTDUMP_BUF_BOMAP (PANFROSTDUMP_BUF_REG + 1)
+#define PANFROSTDUMP_BUF_BO (PANFROSTDUMP_BUF_BOMAP + 1)
+#define PANFROSTDUMP_BUF_TRAILER (PANFROSTDUMP_BUF_BO + 1)
+
+struct panfrost_dump_object_header {
+	__le32 magic;
+	__le32 type;
+	__le32 file_size;
+	__le32 file_offset;
+
+	union {
+		struct pan_reg_hdr {
+			__le64 jc;
+			__le32 gpu_id;
+			__le32 major;
+			__le32 minor;
+			__le64 nbos;
+		} reghdr;
+
+		struct pan_bomap_hdr {
+			__le32 valid;
+			__le64 iova;
+			__le32 data[2];
+		} bomap;
+
+		/*
+		 * Force same size in case we want to expand the header
+		 * with new fields and also keep it 512-byte aligned
+		 */
+
+		__le32 sizer[496];
+	};
+};
+
+/* Registers object, an array of these */
+struct panfrost_dump_registers {
+	__le32 reg;
+	__le32 value;
+};
+
 #if defined(__cplusplus)
 }
 #endif