diff mbox series

[RFC,3/3] iommu/arm-smmu-v3: Add debug interfaces for SMMUv3

Message ID 1611911184-116261-4-git-send-email-wangzhou1@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series iommu/arm-smmu-v3: Add debug interfaces for SMMUv3 | expand

Commit Message

Zhou Wang Jan. 29, 2021, 9:06 a.m. UTC
This patch adds debug interfaces for SMMUv3 driver in sysfs. It adds debug
related files under /sys/kernel/debug/iommu/smmuv3.

User should firstly set device and pasid to pci_dev and pasid by:
(currently only support PCI device)
echo <domain>:<bus>:<dev>.<fun> > /sys/kernel/debug/iommu/smmuv3/pci_dev
echo <pasid> > /sys/kernel/debug/iommu/smmuv3/pasid

Then value in cd and ste can be got by:
cat /sys/kernel/debug/iommu/smmuv3/ste
cat /sys/kernel/debug/iommu/smmuv3/cd

S1 and S2 page tables can be got by:
cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s1
cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s2

For ste, cd and page table, related device and pasid are set in pci_dev and
pasid files as above.

Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
 drivers/iommu/Kconfig                       |  11 +
 drivers/iommu/arm/arm-smmu-v3/Makefile      |   1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   3 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |   8 +
 drivers/iommu/arm/arm-smmu-v3/debugfs.c     | 398 ++++++++++++++++++++++++++++
 5 files changed, 421 insertions(+)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/debugfs.c

Comments

Robin Murphy Feb. 4, 2021, 3:58 p.m. UTC | #1
On 2021-01-29 09:06, Zhou Wang wrote:
> This patch adds debug interfaces for SMMUv3 driver in sysfs. It adds debug
> related files under /sys/kernel/debug/iommu/smmuv3.
> 
> User should firstly set device and pasid to pci_dev and pasid by:
> (currently only support PCI device)
> echo <domain>:<bus>:<dev>.<fun> > /sys/kernel/debug/iommu/smmuv3/pci_dev
> echo <pasid> > /sys/kernel/debug/iommu/smmuv3/pasid
> 
> Then value in cd and ste can be got by:
> cat /sys/kernel/debug/iommu/smmuv3/ste
> cat /sys/kernel/debug/iommu/smmuv3/cd
> 
> S1 and S2 page tables can be got by:
> cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s1
> cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s2
> 
> For ste, cd and page table, related device and pasid are set in pci_dev and
> pasid files as above.
> 
> Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
> ---
>   drivers/iommu/Kconfig                       |  11 +
>   drivers/iommu/arm/arm-smmu-v3/Makefile      |   1 +
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   3 +
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |   8 +
>   drivers/iommu/arm/arm-smmu-v3/debugfs.c     | 398 ++++++++++++++++++++++++++++
>   5 files changed, 421 insertions(+)
>   create mode 100644 drivers/iommu/arm/arm-smmu-v3/debugfs.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 192ef8f..4822c88 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -325,6 +325,17 @@ config ARM_SMMU_V3_SVA
>   	  Say Y here if your system supports SVA extensions such as PCIe PASID
>   	  and PRI.
>   
> +config ARM_SMMU_V3_DEBUGFS
> +	bool "Export ARM SMMUv3 internals in Debugfs"
> +	depends on ARM_SMMU_V3 && IOMMU_DEBUGFS
> +	help
> +	  DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!
> +
> +	  Expose ARM SMMUv3 internals in Debugfs.
> +
> +	  This option is -NOT- intended for production environments, and should
> +	  only be enabled for debugging ARM SMMUv3.
> +
>   config S390_IOMMU
>   	def_bool y if S390 && PCI
>   	depends on S390 && PCI
> diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
> index 54feb1ec..55b411a 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/Makefile
> +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
> @@ -3,3 +3,4 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
>   arm_smmu_v3-objs-y += arm-smmu-v3.o
>   arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
>   arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
> +obj-$(CONFIG_ARM_SMMU_V3_DEBUGFS) += debugfs.o
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index b65f63e2..aac7fdb 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -3602,6 +3602,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>   		return ret;
>   	}
>   
> +	arm_smmu_debugfs_init();
> +
>   	return arm_smmu_set_bus_ops(&arm_smmu_ops);
>   }
>   
> @@ -3610,6 +3612,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
>   	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
>   
>   	arm_smmu_set_bus_ops(NULL);
> +	arm_smmu_debugfs_uninit();
>   	iommu_device_unregister(&smmu->iommu);
>   	iommu_device_sysfs_remove(&smmu->iommu);
>   	arm_smmu_device_disable(smmu);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 3e7af39..31c4580 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -752,4 +752,12 @@ static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
>   
>   static inline void arm_smmu_sva_notifier_synchronize(void) {}
>   #endif /* CONFIG_ARM_SMMU_V3_SVA */
> +
> +#ifdef CONFIG_ARM_SMMU_V3_DEBUGFS
> +void arm_smmu_debugfs_init(void);
> +void arm_smmu_debugfs_uninit(void);
> +#else
> +static inline void arm_smmu_debugfs_init(void) {}
> +static inline void arm_smmu_debugfs_uninit(void) {}
> +#endif /* CONFIG_ARM_SMMU_V3_DEBUGFS */
>   #endif /* _ARM_SMMU_V3_H */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/debugfs.c b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
> new file mode 100644
> index 0000000..1af219a
> --- /dev/null
> +++ b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
> @@ -0,0 +1,398 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/debugfs.h>
> +#include <linux/iommu.h>
> +#include <linux/io-pgtable.h>
> +#include <linux/pci.h>
> +#include "arm-smmu-v3.h"
> +#include "../../io-pgtable-arm.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt)	"SMMUv3 debug: " fmt
> +
> +#define NAME_BUF_LEN 32
> +
> +static struct dentry *arm_smmu_debug;
> +static char dump_pci_dev[NAME_BUF_LEN];
> +static u32 pasid;
> +static struct mutex lock;
> +
> +static ssize_t master_pdev_read(struct file *filp, char __user *buf,
> +				size_t count, loff_t *pos)
> +{
> +	char pdev_name[NAME_BUF_LEN];
> +	char name[NAME_BUF_LEN];
> +	int ret;
> +
> +	mutex_lock(&lock);
> +	strncpy(pdev_name, dump_pci_dev, NAME_BUF_LEN);
> +	mutex_unlock(&lock);
> +
> +	if (!strlen(pdev_name)) {
> +		pr_err("Please set pci_dev firstly\n");

Even if it's "just debugfs", printing userspace-triggered errors to the 
kernel log still isn't a great idea. Imagine at some point during 
developing a script you inadvertently try to read this file in a loop, 
then you want to go back and check something from early in the boot 
log... oops.

> +		return 0;

Surely you'd want to return an actual error?

> +	}
> +
> +	ret = scnprintf(name, NAME_BUF_LEN, "%s\n", pdev_name);
> +	return simple_read_from_buffer(buf, count, pos, name, ret);
> +}
> +
> +static ssize_t master_pdev_write(struct file *filp, const char __user *buf,
> +				 size_t count, loff_t *pos)
> +{
> +	char name[NAME_BUF_LEN];
> +	struct device *dev;
> +	int len;
> +
> +	if (*pos != 0)
> +		return 0;
> +
> +	if (count >= NAME_BUF_LEN)
> +		return -ENOSPC;
> +
> +	len = simple_write_to_buffer(name, NAME_BUF_LEN - 1, pos, buf, count);
> +	if (len < 0)
> +		return len;
> +	name[len] = '\0';
> +
> +	dev = bus_find_device_by_name(&pci_bus_type, NULL, name);
> +	if (!dev) {
> +		pr_err("Failed to find device\n");

As above.

> +		return -EINVAL;

Hey, at least that's right! :)

> +	}
> +
> +	mutex_lock(&lock);
> +	strncpy(dump_pci_dev, dev_name(dev), NAME_BUF_LEN);
> +	mutex_unlock(&lock);
> +
> +	put_device(dev);
> +
> +	return count;
> +}
> +
> +static const struct file_operations master_pdev_fops = {
> +	.owner = THIS_MODULE,
> +	.open = simple_open,
> +	.read = master_pdev_read,
> +	.write = master_pdev_write,
> +};
> +
> +static struct arm_smmu_master *arm_smmu_get_master(struct device *dev)
> +{
> +	struct arm_smmu_master *master;
> +
> +	if (!dev->iommu) {
> +		pr_err("master device driver may not be loaded!\n");
> +		return NULL;
> +	}
> +
> +	master = dev_iommu_priv_get(dev);
> +	if (!master) {
> +		pr_err("Failed to find master dev\n");
> +		return NULL;
> +	}
> +
> +	return master;
> +}
> +
> +static void ste_dump(struct seq_file *m, struct device *dev, __le64 *ste)
> +{
> +	int i;
> +
> +	seq_printf(m, "SMMUv3 STE values for device: %s\n", dev_name(dev));
> +	for (i = 0; i < STRTAB_STE_DWORDS; i++) {
> +		seq_printf(m, "0x%016llx\n", *ste);

It would take about half a dozen lines of userspace code to open 
/dev/mem, read SMMU_STRTAB_BASE and hexdump what it points to (or script 
the equivalent in an external debug environment). This seems a little 
low on value :/

> +		ste++;
> +	}
> +}
> +
> +static int ste_show(struct seq_file *m, void *unused)
> +{
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_device *smmu;
> +	struct device *dev;
> +	__le64 *ste;
> +
> +	mutex_lock(&lock);
> +
> +	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
> +	if (!dev) {
> +		mutex_unlock(&lock);
> +		pr_err("Failed to find device\n");
> +		return -EINVAL;
> +	}
> +
> +	master = arm_smmu_get_master(dev);
> +	if (!master) {
> +		put_device(dev);
> +		mutex_unlock(&lock);
> +		return -ENODEV;
> +	}
> +	smmu = master->smmu;
> +
> +	/* currently only support one master one sid */
> +	ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
> +	ste_dump(m, dev, ste);

This interface clearly won't scale well, but even as-is it doesn't 
completely work. A PCI/PCIe device may well alias to multiple Stream 
IDs, and not use the first one to perform the DMA operations you're 
trying to debug (Marvell SATA controllers are such fun...)

If you need to inspect the Stream Table for an SMMU instance, I'd 
suggest simply dumping the Stream Table for that SMMU instance. Maybe 
filter it by Stream ID if you really want to, but trying to be cute and 
abstract away the notion of Stream IDs entirely means significant 
ongoing complexity for very little gain. By your own admission this is 
low-level debugging for people who already understand what they're 
doing. Getting the information out is important; having a 
consumer-product level of user-friendliness is not. Maintaining 
filtering logic in kernel code is always going to be orders of magnitude 
more work than pasting a complete dump into a text editor and trimming it.

> +
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +
> +	return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(ste);
> +
> +static void cd_dump(struct seq_file *m, struct device *dev, __le64 *cd)
> +{
> +	int i;
> +
> +	seq_printf(m, "SMMUv3 CD values for device: %s, ssid: 0x%x\n",
> +		   dev_name(dev), pasid);
> +	for (i = 0; i < CTXDESC_CD_DWORDS; i++) {
> +		seq_printf(m, "0x%016llx\n", *cd);

Once more this is basically just trivial pointer chasing. At this point 
I'm now entertaining the idea of little bit of a Python which wraps 
/dev/mem in an interface that looks like Arm Development Studio's "read 
physical memory" commands, so the same decoding script could work in 
both environments...

> +		cd++;
> +	}
> +}
> +
> +static int cd_show(struct seq_file *m, void *unused)
> +{
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_domain *domain;
> +	struct device *dev;
> +	__le64 *cd;
> +	int ret;
> +
> +	mutex_lock(&lock);
> +
> +	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
> +	if (!dev) {
> +		mutex_unlock(&lock);
> +		pr_err("Failed to find device\n");
> +		return -EINVAL;
> +	}
> +
> +	master = arm_smmu_get_master(dev);
> +	if (!master) {
> +		ret = -ENODEV;
> +		goto err_out;
> +	}
> +	domain = master->domain;
> +
> +	cd = arm_smmu_get_cd_ptr(domain, pasid);
> +	if (!cd) {
> +		ret = -EINVAL;
> +		pr_err("Failed to find cd(ssid: %u)\n", pasid);
> +		goto err_out;
> +	}
> +	cd_dump(m, dev, cd);
> +
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +
> +	return 0;
> +
> +err_out:
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +	return ret;
> +}
> +DEFINE_SHOW_ATTRIBUTE(cd);
> +
> +static void __ptdump(arm_lpae_iopte *ptep, int lvl, u64 va,
> +		     struct arm_lpae_io_pgtable *data, struct seq_file *m)
> +{
> +	arm_lpae_iopte pte, *ptep_next;
> +	u64 i, tmp_va = 0;
> +	int entry_num;
> +
> +	entry_num = 1 << (data->bits_per_level + ARM_LPAE_PGD_IDX(lvl, data));
> +
> +	for (i = 0; i < entry_num; i++) {
> +		pte = READ_ONCE(*(ptep + i));
> +		if (!pte)
> +			continue;
> +
> +		tmp_va = va | (i << ARM_LPAE_LVL_SHIFT(lvl, data));
> +
> +		if (iopte_leaf(pte, lvl, data->iop.fmt)) {
> +			/* To do: print prot */
> +			seq_printf(m, "va: %llx -> pa: %llx\n", tmp_va,
> +				   iopte_to_paddr(pte, data));
> +			continue;
> +		}
> +
> +		ptep_next = iopte_deref(pte, data);
> +		__ptdump(ptep_next, lvl + 1, tmp_va, data, m);
> +	}
> +}
> +
> +static void ptdump(struct seq_file *m, struct arm_smmu_domain *domain,
> +		   void *pgd, int stage)
> +{
> +	struct arm_lpae_io_pgtable *data, data_sva;
> +	int levels, va_bits, bits_per_level;
> +	struct io_pgtable_ops *ops;
> +	arm_lpae_iopte *ptep = pgd;
> +
> +	if (stage == 1 && !pasid) {
> +		ops = domain->pgtbl_ops;
> +		data = io_pgtable_ops_to_data(ops);
> +	} else {
> +		va_bits = VA_BITS - PAGE_SHIFT;
> +		bits_per_level = PAGE_SHIFT - ilog2(sizeof(arm_lpae_iopte));
> +		levels = DIV_ROUND_UP(va_bits, bits_per_level);
> +
> +		data_sva.start_level = ARM_LPAE_MAX_LEVELS - levels;
> +		data_sva.pgd_bits = va_bits - (bits_per_level * (levels - 1));
> +		data_sva.bits_per_level = bits_per_level;
> +		data_sva.pgd = pgd;
> +
> +		data = &data_sva;
> +	}
> +
> +	__ptdump(ptep, data->start_level, 0, data, m);
> +}

As Will suggested, I agree that code for walking and dumping pagetables 
themselves deserves to be common to io-pgtable. For "proper" SVA we can 
already dump the CPU pagetables.

Robin.

> +static int pt_dump_s1_show(struct seq_file *m, void *unused)
> +{
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_domain *domain;
> +	struct device *dev;
> +	__le64 *cd;
> +	void *pgd;
> +	u64 ttbr;
> +	int ret;
> +
> +	mutex_lock(&lock);
> +
> +	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
> +	if (!dev) {
> +		mutex_unlock(&lock);
> +		pr_err("Failed to find device\n");
> +		return -EINVAL;
> +	}
> +
> +	master = arm_smmu_get_master(dev);
> +	if (!master) {
> +		ret = -ENODEV;
> +		goto err_out;
> +	}
> +	domain = master->domain;
> +
> +	cd = arm_smmu_get_cd_ptr(domain, pasid);
> +	if (!cd || !(le64_to_cpu(cd[0]) & CTXDESC_CD_0_V)) {
> +		ret = -EINVAL;
> +		pr_err("Failed to find valid cd(ssid: %u)\n", pasid);
> +		goto err_out;
> +	}
> +
> +	/* CD0 and other CDx are all using ttbr0 */
> +	ttbr = le64_to_cpu(cd[1]) & CTXDESC_CD_1_TTB0_MASK;
> +	pgd = phys_to_virt(ttbr);
> +
> +	if (ttbr) {
> +		seq_printf(m, "SMMUv3 dump page table for device %s, stage 1, ssid 0x%x:\n",
> +			   dev_name(dev), pasid);
> +		ptdump(m, domain, pgd, 1);
> +	}
> +
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +
> +	return 0;
> +
> +err_out:
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +	return ret;
> +}
> +DEFINE_SHOW_ATTRIBUTE(pt_dump_s1);
> +
> +static int pt_dump_s2_show(struct seq_file *m, void *unused)
> +{
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_device *smmu;
> +	struct device *dev;
> +	__le64 *ste;
> +	u64 vttbr;
> +	void *pgd;
> +	int ret;
> +
> +	mutex_lock(&lock);
> +
> +	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
> +	if (!dev) {
> +		mutex_unlock(&lock);
> +		pr_err("Failed to find device\n");
> +		return -EINVAL;
> +	}
> +
> +	master = arm_smmu_get_master(dev);
> +	if (!master) {
> +		ret = -ENODEV;
> +		goto err_out;
> +	}
> +	smmu = master->smmu;
> +
> +	/* currently only support one master one sid */
> +	ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
> +	if (!(le64_to_cpu(ste[0]) & (1UL << 2))) {
> +		ret = -EINVAL;
> +		pr_err("Stage 2 translation is not valid\n");
> +		goto err_out;
> +	}
> +
> +	vttbr = le64_to_cpu(ste[3]) & STRTAB_STE_3_S2TTB_MASK;
> +	pgd = phys_to_virt(vttbr);
> +
> +	if (vttbr) {
> +		seq_printf(m, "SMMUv3 dump page table for device %s, stage 2:\n",
> +			   dev_name(dev));
> +		ptdump(m, 0, pgd, 2);
> +	}
> +
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +
> +	return 0;
> +
> +err_out:
> +	put_device(dev);
> +	mutex_unlock(&lock);
> +	return ret;
> +}
> +DEFINE_SHOW_ATTRIBUTE(pt_dump_s2);
> +
> +void arm_smmu_debugfs_init(void)
> +{
> +	mutex_init(&lock);
> +
> +	arm_smmu_debug = debugfs_create_dir("smmuv3", iommu_debugfs_dir);
> +
> +	debugfs_create_file("pci_dev", 0644, arm_smmu_debug, NULL,
> +			    &master_pdev_fops);
> +	/*
> +	 * Problem here is we need to know dump which cd, currently my idea
> +	 * is we can get related pasid by smmu_bond_get trace point or by
> +	 * debug interface of master device specific driver, then here we
> +	 * use pasid to dump related cd.
> +	 *
> +	 * Or there is no need to dump page table about s1(pasid != 0) and s2
> +	 * as they can be got by /proc/<pid>/pagemap.
> +	 */
> +	debugfs_create_u32("pasid", 0644, arm_smmu_debug, &pasid);
> +
> +	debugfs_create_file("ste", 0444, arm_smmu_debug, NULL, &ste_fops);
> +
> +	debugfs_create_file("cd", 0444, arm_smmu_debug, NULL, &cd_fops);
> +
> +	debugfs_create_file("pt_dump_s1", 0444, arm_smmu_debug, NULL,
> +			    &pt_dump_s1_fops);
> +
> +	debugfs_create_file("pt_dump_s2", 0444, arm_smmu_debug, NULL,
> +			    &pt_dump_s2_fops);
> +}
> +
> +void arm_smmu_debugfs_uninit(void)
> +{
> +	debugfs_remove_recursive(arm_smmu_debug);
> +	mutex_destroy(&lock);
> +}
>
Zhou Wang Feb. 5, 2021, 12:04 p.m. UTC | #2
On 2021/2/4 23:58, Robin Murphy wrote:
> On 2021-01-29 09:06, Zhou Wang wrote:
>> This patch adds debug interfaces for SMMUv3 driver in sysfs. It adds debug
>> related files under /sys/kernel/debug/iommu/smmuv3.
>>
>> User should firstly set device and pasid to pci_dev and pasid by:
>> (currently only support PCI device)
>> echo <domain>:<bus>:<dev>.<fun> > /sys/kernel/debug/iommu/smmuv3/pci_dev
>> echo <pasid> > /sys/kernel/debug/iommu/smmuv3/pasid
>>
>> Then value in cd and ste can be got by:
>> cat /sys/kernel/debug/iommu/smmuv3/ste
>> cat /sys/kernel/debug/iommu/smmuv3/cd
>>
>> S1 and S2 page tables can be got by:
>> cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s1
>> cat /sys/kernel/debug/iommu/smmuv3/pt_dump_s2
>>
>> For ste, cd and page table, related device and pasid are set in pci_dev and
>> pasid files as above.
>>
>> Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
>> ---
>>   drivers/iommu/Kconfig                       |  11 +
>>   drivers/iommu/arm/arm-smmu-v3/Makefile      |   1 +
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   3 +
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |   8 +
>>   drivers/iommu/arm/arm-smmu-v3/debugfs.c     | 398 ++++++++++++++++++++++++++++
>>   5 files changed, 421 insertions(+)
>>   create mode 100644 drivers/iommu/arm/arm-smmu-v3/debugfs.c
>>
>> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
>> index 192ef8f..4822c88 100644
>> --- a/drivers/iommu/Kconfig
>> +++ b/drivers/iommu/Kconfig
>> @@ -325,6 +325,17 @@ config ARM_SMMU_V3_SVA
>>         Say Y here if your system supports SVA extensions such as PCIe PASID
>>         and PRI.
>>   +config ARM_SMMU_V3_DEBUGFS
>> +    bool "Export ARM SMMUv3 internals in Debugfs"
>> +    depends on ARM_SMMU_V3 && IOMMU_DEBUGFS
>> +    help
>> +      DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!
>> +
>> +      Expose ARM SMMUv3 internals in Debugfs.
>> +
>> +      This option is -NOT- intended for production environments, and should
>> +      only be enabled for debugging ARM SMMUv3.
>> +
>>   config S390_IOMMU
>>       def_bool y if S390 && PCI
>>       depends on S390 && PCI
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
>> index 54feb1ec..55b411a 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/Makefile
>> +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
>> @@ -3,3 +3,4 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
>>   arm_smmu_v3-objs-y += arm-smmu-v3.o
>>   arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
>>   arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
>> +obj-$(CONFIG_ARM_SMMU_V3_DEBUGFS) += debugfs.o
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index b65f63e2..aac7fdb 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -3602,6 +3602,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>>           return ret;
>>       }
>>   +    arm_smmu_debugfs_init();
>> +
>>       return arm_smmu_set_bus_ops(&arm_smmu_ops);
>>   }
>>   @@ -3610,6 +3612,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
>>       struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
>>         arm_smmu_set_bus_ops(NULL);
>> +    arm_smmu_debugfs_uninit();
>>       iommu_device_unregister(&smmu->iommu);
>>       iommu_device_sysfs_remove(&smmu->iommu);
>>       arm_smmu_device_disable(smmu);
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> index 3e7af39..31c4580 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> @@ -752,4 +752,12 @@ static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
>>     static inline void arm_smmu_sva_notifier_synchronize(void) {}
>>   #endif /* CONFIG_ARM_SMMU_V3_SVA */
>> +
>> +#ifdef CONFIG_ARM_SMMU_V3_DEBUGFS
>> +void arm_smmu_debugfs_init(void);
>> +void arm_smmu_debugfs_uninit(void);
>> +#else
>> +static inline void arm_smmu_debugfs_init(void) {}
>> +static inline void arm_smmu_debugfs_uninit(void) {}
>> +#endif /* CONFIG_ARM_SMMU_V3_DEBUGFS */
>>   #endif /* _ARM_SMMU_V3_H */
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/debugfs.c b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
>> new file mode 100644
>> index 0000000..1af219a
>> --- /dev/null
>> +++ b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
>> @@ -0,0 +1,398 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/debugfs.h>
>> +#include <linux/iommu.h>
>> +#include <linux/io-pgtable.h>
>> +#include <linux/pci.h>
>> +#include "arm-smmu-v3.h"
>> +#include "../../io-pgtable-arm.h"
>> +
>> +#undef pr_fmt
>> +#define pr_fmt(fmt)    "SMMUv3 debug: " fmt
>> +
>> +#define NAME_BUF_LEN 32
>> +
>> +static struct dentry *arm_smmu_debug;
>> +static char dump_pci_dev[NAME_BUF_LEN];
>> +static u32 pasid;
>> +static struct mutex lock;
>> +
>> +static ssize_t master_pdev_read(struct file *filp, char __user *buf,
>> +                size_t count, loff_t *pos)
>> +{
>> +    char pdev_name[NAME_BUF_LEN];
>> +    char name[NAME_BUF_LEN];
>> +    int ret;
>> +
>> +    mutex_lock(&lock);
>> +    strncpy(pdev_name, dump_pci_dev, NAME_BUF_LEN);
>> +    mutex_unlock(&lock);
>> +
>> +    if (!strlen(pdev_name)) {
>> +        pr_err("Please set pci_dev firstly\n");
> 
> Even if it's "just debugfs", printing userspace-triggered errors to the kernel log
> still isn't a great idea. Imagine at some point during developing a script you
> inadvertently try to read this file in a loop, then you want to go back and check
> something from early in the boot log... oops.

Yes, you are right. Trigger kernel log from user space is bad.
Let's change to use seq_printf in next version.

> 
>> +        return 0;
> 
> Surely you'd want to return an actual error?

My mistake here. Should return -EINVAL.

> 
>> +    }
>> +
>> +    ret = scnprintf(name, NAME_BUF_LEN, "%s\n", pdev_name);
>> +    return simple_read_from_buffer(buf, count, pos, name, ret);
>> +}
>> +
>> +static ssize_t master_pdev_write(struct file *filp, const char __user *buf,
>> +                 size_t count, loff_t *pos)
>> +{
>> +    char name[NAME_BUF_LEN];
>> +    struct device *dev;
>> +    int len;
>> +
>> +    if (*pos != 0)
>> +        return 0;
>> +
>> +    if (count >= NAME_BUF_LEN)
>> +        return -ENOSPC;
>> +
>> +    len = simple_write_to_buffer(name, NAME_BUF_LEN - 1, pos, buf, count);
>> +    if (len < 0)
>> +        return len;
>> +    name[len] = '\0';
>> +
>> +    dev = bus_find_device_by_name(&pci_bus_type, NULL, name);
>> +    if (!dev) {
>> +        pr_err("Failed to find device\n");
> 
> As above.

OK.

> 
>> +        return -EINVAL;
> 
> Hey, at least that's right! :)
> 
>> +    }
>> +
>> +    mutex_lock(&lock);
>> +    strncpy(dump_pci_dev, dev_name(dev), NAME_BUF_LEN);
>> +    mutex_unlock(&lock);
>> +
>> +    put_device(dev);
>> +
>> +    return count;
>> +}
>> +
>> +static const struct file_operations master_pdev_fops = {
>> +    .owner = THIS_MODULE,
>> +    .open = simple_open,
>> +    .read = master_pdev_read,
>> +    .write = master_pdev_write,
>> +};
>> +
>> +static struct arm_smmu_master *arm_smmu_get_master(struct device *dev)
>> +{
>> +    struct arm_smmu_master *master;
>> +
>> +    if (!dev->iommu) {
>> +        pr_err("master device driver may not be loaded!\n");
>> +        return NULL;
>> +    }
>> +
>> +    master = dev_iommu_priv_get(dev);
>> +    if (!master) {
>> +        pr_err("Failed to find master dev\n");
>> +        return NULL;
>> +    }
>> +
>> +    return master;
>> +}
>> +
>> +static void ste_dump(struct seq_file *m, struct device *dev, __le64 *ste)
>> +{
>> +    int i;
>> +
>> +    seq_printf(m, "SMMUv3 STE values for device: %s\n", dev_name(dev));
>> +    for (i = 0; i < STRTAB_STE_DWORDS; i++) {
>> +        seq_printf(m, "0x%016llx\n", *ste);
> 
> It would take about half a dozen lines of userspace code to open /dev/mem,
> read SMMU_STRTAB_BASE and hexdump what it points to (or script the equivalent
> in an external debug environment). This seems a little low on value :/

It can be done by a user space tool, indeed currently we just use a such
tool to debug. However, I think it is better that there is a common way to help
to debug in kernel. For a kernel with debug config open, information can be got
directly for sysfs, no need to put an additional user space tool in system, and
developers could share these debug interfaces.

> 
>> +        ste++;
>> +    }
>> +}
>> +
>> +static int ste_show(struct seq_file *m, void *unused)
>> +{
>> +    struct arm_smmu_master *master;
>> +    struct arm_smmu_device *smmu;
>> +    struct device *dev;
>> +    __le64 *ste;
>> +
>> +    mutex_lock(&lock);
>> +
>> +    dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
>> +    if (!dev) {
>> +        mutex_unlock(&lock);
>> +        pr_err("Failed to find device\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    master = arm_smmu_get_master(dev);
>> +    if (!master) {
>> +        put_device(dev);
>> +        mutex_unlock(&lock);
>> +        return -ENODEV;
>> +    }
>> +    smmu = master->smmu;
>> +
>> +    /* currently only support one master one sid */
>> +    ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
>> +    ste_dump(m, dev, ste);
> 
> This interface clearly won't scale well, but even as-is it doesn't completely work.
> A PCI/PCIe device may well alias to multiple Stream IDs, and not use the first one

Yes, I noticed this and put a comment here :)

> to perform the DMA operations you're trying to debug
> (Marvell SATA controllers are such fun...)

That is interesting. Which driver is for this controllers?

> 
> If you need to inspect the Stream Table for an SMMU instance, I'd suggest simply
> dumping the Stream Table for that SMMU instance. Maybe filter it by Stream ID if
> you really want to, but trying to be cute and abstract away the notion of Stream
> IDs entirely means significant ongoing complexity for very little gain. By your own
> admission this is low-level debugging for people who already understand what they're
> doing. Getting the information out is important; having a consumer-product level of
> user-friendliness is not. Maintaining filtering logic in kernel code is always going
> to be orders of magnitude more work than pasting a complete dump into a text editor
> and trimming it.
>

I got your idea. There is problem that is the amount of data for CD/STE and page table
is different. Date of page table can be huge for SVA case or enable multiple PCIe VFs,
so in this series, we set device/pasid firstly, then get related information which are
STE/CD/page-table.

Do you think better to change as below?

/sys/kernel/debug/iommu/smmuv3
 -- smmu.xx
   - STEs  /* dump stream table under this smmu instance */
   - CDs   /* dump all CDs under this smmu instance */
   - pt_dump_s1 /* dump all page tables under this smmu instance */
 -- smmu.yy
   - STEs
   - CDs
   - pt_dump_s1
 ...

>> +
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +
>> +    return 0;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(ste);
>> +
>> +static void cd_dump(struct seq_file *m, struct device *dev, __le64 *cd)
>> +{
>> +    int i;
>> +
>> +    seq_printf(m, "SMMUv3 CD values for device: %s, ssid: 0x%x\n",
>> +           dev_name(dev), pasid);
>> +    for (i = 0; i < CTXDESC_CD_DWORDS; i++) {
>> +        seq_printf(m, "0x%016llx\n", *cd);
> 
> Once more this is basically just trivial pointer chasing. At this point
> I'm now entertaining the idea of little bit of a Python which wraps /dev/mem
> in an interface that looks like Arm Development Studio's "read physical memory"
> commands, so the same decoding script could work in both environments...

No matter where to do this debug interfaces(kernel or user space), my basic idea
is to put all related interfaces in one place. This series just copies the ideas
of other iommu drivers, it does this in kernel. Besides benefits mentioned above,
in kernel, we could reuse codes in smmu-v3 and io-pgtable driver :)

> 
>> +        cd++;
>> +    }
>> +}
>> +
>> +static int cd_show(struct seq_file *m, void *unused)
>> +{
>> +    struct arm_smmu_master *master;
>> +    struct arm_smmu_domain *domain;
>> +    struct device *dev;
>> +    __le64 *cd;
>> +    int ret;
>> +
>> +    mutex_lock(&lock);
>> +
>> +    dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
>> +    if (!dev) {
>> +        mutex_unlock(&lock);
>> +        pr_err("Failed to find device\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    master = arm_smmu_get_master(dev);
>> +    if (!master) {
>> +        ret = -ENODEV;
>> +        goto err_out;
>> +    }
>> +    domain = master->domain;
>> +
>> +    cd = arm_smmu_get_cd_ptr(domain, pasid);
>> +    if (!cd) {
>> +        ret = -EINVAL;
>> +        pr_err("Failed to find cd(ssid: %u)\n", pasid);
>> +        goto err_out;
>> +    }
>> +    cd_dump(m, dev, cd);
>> +
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +
>> +    return 0;
>> +
>> +err_out:
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +    return ret;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(cd);
>> +
>> +static void __ptdump(arm_lpae_iopte *ptep, int lvl, u64 va,
>> +             struct arm_lpae_io_pgtable *data, struct seq_file *m)
>> +{
>> +    arm_lpae_iopte pte, *ptep_next;
>> +    u64 i, tmp_va = 0;
>> +    int entry_num;
>> +
>> +    entry_num = 1 << (data->bits_per_level + ARM_LPAE_PGD_IDX(lvl, data));
>> +
>> +    for (i = 0; i < entry_num; i++) {
>> +        pte = READ_ONCE(*(ptep + i));
>> +        if (!pte)
>> +            continue;
>> +
>> +        tmp_va = va | (i << ARM_LPAE_LVL_SHIFT(lvl, data));
>> +
>> +        if (iopte_leaf(pte, lvl, data->iop.fmt)) {
>> +            /* To do: print prot */
>> +            seq_printf(m, "va: %llx -> pa: %llx\n", tmp_va,
>> +                   iopte_to_paddr(pte, data));
>> +            continue;
>> +        }
>> +
>> +        ptep_next = iopte_deref(pte, data);
>> +        __ptdump(ptep_next, lvl + 1, tmp_va, data, m);
>> +    }
>> +}
>> +
>> +static void ptdump(struct seq_file *m, struct arm_smmu_domain *domain,
>> +           void *pgd, int stage)
>> +{
>> +    struct arm_lpae_io_pgtable *data, data_sva;
>> +    int levels, va_bits, bits_per_level;
>> +    struct io_pgtable_ops *ops;
>> +    arm_lpae_iopte *ptep = pgd;
>> +
>> +    if (stage == 1 && !pasid) {
>> +        ops = domain->pgtbl_ops;
>> +        data = io_pgtable_ops_to_data(ops);
>> +    } else {
>> +        va_bits = VA_BITS - PAGE_SHIFT;
>> +        bits_per_level = PAGE_SHIFT - ilog2(sizeof(arm_lpae_iopte));
>> +        levels = DIV_ROUND_UP(va_bits, bits_per_level);
>> +
>> +        data_sva.start_level = ARM_LPAE_MAX_LEVELS - levels;
>> +        data_sva.pgd_bits = va_bits - (bits_per_level * (levels - 1));
>> +        data_sva.bits_per_level = bits_per_level;
>> +        data_sva.pgd = pgd;
>> +
>> +        data = &data_sva;
>> +    }
>> +
>> +    __ptdump(ptep, data->start_level, 0, data, m);
>> +}
> 
> As Will suggested, I agree that code for walking and dumping pagetables
> themselves deserves to be common to io-pgtable. For "proper" SVA we can
> already dump the CPU pagetables.

Sorry, I am a little confused here. Do you mean we put the dump page table
codes in io-pgtable, and use exported dump page tables interface in this
debugfs.c?

Yes, for SVA, we can use /proc/<pid>/pagemap to dump page tables.

Best and thanks,
Zhou

> 
> Robin.
> 
>> +static int pt_dump_s1_show(struct seq_file *m, void *unused)
>> +{
>> +    struct arm_smmu_master *master;
>> +    struct arm_smmu_domain *domain;
>> +    struct device *dev;
>> +    __le64 *cd;
>> +    void *pgd;
>> +    u64 ttbr;
>> +    int ret;
>> +
>> +    mutex_lock(&lock);
>> +
>> +    dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
>> +    if (!dev) {
>> +        mutex_unlock(&lock);
>> +        pr_err("Failed to find device\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    master = arm_smmu_get_master(dev);
>> +    if (!master) {
>> +        ret = -ENODEV;
>> +        goto err_out;
>> +    }
>> +    domain = master->domain;
>> +
>> +    cd = arm_smmu_get_cd_ptr(domain, pasid);
>> +    if (!cd || !(le64_to_cpu(cd[0]) & CTXDESC_CD_0_V)) {
>> +        ret = -EINVAL;
>> +        pr_err("Failed to find valid cd(ssid: %u)\n", pasid);
>> +        goto err_out;
>> +    }
>> +
>> +    /* CD0 and other CDx are all using ttbr0 */
>> +    ttbr = le64_to_cpu(cd[1]) & CTXDESC_CD_1_TTB0_MASK;
>> +    pgd = phys_to_virt(ttbr);
>> +
>> +    if (ttbr) {
>> +        seq_printf(m, "SMMUv3 dump page table for device %s, stage 1, ssid 0x%x:\n",
>> +               dev_name(dev), pasid);
>> +        ptdump(m, domain, pgd, 1);
>> +    }
>> +
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +
>> +    return 0;
>> +
>> +err_out:
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +    return ret;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(pt_dump_s1);
>> +
>> +static int pt_dump_s2_show(struct seq_file *m, void *unused)
>> +{
>> +    struct arm_smmu_master *master;
>> +    struct arm_smmu_device *smmu;
>> +    struct device *dev;
>> +    __le64 *ste;
>> +    u64 vttbr;
>> +    void *pgd;
>> +    int ret;
>> +
>> +    mutex_lock(&lock);
>> +
>> +    dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
>> +    if (!dev) {
>> +        mutex_unlock(&lock);
>> +        pr_err("Failed to find device\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    master = arm_smmu_get_master(dev);
>> +    if (!master) {
>> +        ret = -ENODEV;
>> +        goto err_out;
>> +    }
>> +    smmu = master->smmu;
>> +
>> +    /* currently only support one master one sid */
>> +    ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
>> +    if (!(le64_to_cpu(ste[0]) & (1UL << 2))) {
>> +        ret = -EINVAL;
>> +        pr_err("Stage 2 translation is not valid\n");
>> +        goto err_out;
>> +    }
>> +
>> +    vttbr = le64_to_cpu(ste[3]) & STRTAB_STE_3_S2TTB_MASK;
>> +    pgd = phys_to_virt(vttbr);
>> +
>> +    if (vttbr) {
>> +        seq_printf(m, "SMMUv3 dump page table for device %s, stage 2:\n",
>> +               dev_name(dev));
>> +        ptdump(m, 0, pgd, 2);
>> +    }
>> +
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +
>> +    return 0;
>> +
>> +err_out:
>> +    put_device(dev);
>> +    mutex_unlock(&lock);
>> +    return ret;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(pt_dump_s2);
>> +
>> +void arm_smmu_debugfs_init(void)
>> +{
>> +    mutex_init(&lock);
>> +
>> +    arm_smmu_debug = debugfs_create_dir("smmuv3", iommu_debugfs_dir);
>> +
>> +    debugfs_create_file("pci_dev", 0644, arm_smmu_debug, NULL,
>> +                &master_pdev_fops);
>> +    /*
>> +     * Problem here is we need to know dump which cd, currently my idea
>> +     * is we can get related pasid by smmu_bond_get trace point or by
>> +     * debug interface of master device specific driver, then here we
>> +     * use pasid to dump related cd.
>> +     *
>> +     * Or there is no need to dump page table about s1(pasid != 0) and s2
>> +     * as they can be got by /proc/<pid>/pagemap.
>> +     */
>> +    debugfs_create_u32("pasid", 0644, arm_smmu_debug, &pasid);
>> +
>> +    debugfs_create_file("ste", 0444, arm_smmu_debug, NULL, &ste_fops);
>> +
>> +    debugfs_create_file("cd", 0444, arm_smmu_debug, NULL, &cd_fops);
>> +
>> +    debugfs_create_file("pt_dump_s1", 0444, arm_smmu_debug, NULL,
>> +                &pt_dump_s1_fops);
>> +
>> +    debugfs_create_file("pt_dump_s2", 0444, arm_smmu_debug, NULL,
>> +                &pt_dump_s2_fops);
>> +}
>> +
>> +void arm_smmu_debugfs_uninit(void)
>> +{
>> +    debugfs_remove_recursive(arm_smmu_debug);
>> +    mutex_destroy(&lock);
>> +}
>>
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 192ef8f..4822c88 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -325,6 +325,17 @@  config ARM_SMMU_V3_SVA
 	  Say Y here if your system supports SVA extensions such as PCIe PASID
 	  and PRI.
 
+config ARM_SMMU_V3_DEBUGFS
+	bool "Export ARM SMMUv3 internals in Debugfs"
+	depends on ARM_SMMU_V3 && IOMMU_DEBUGFS
+	help
+	  DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!
+
+	  Expose ARM SMMUv3 internals in Debugfs.
+
+	  This option is -NOT- intended for production environments, and should
+	  only be enabled for debugging ARM SMMUv3.
+
 config S390_IOMMU
 	def_bool y if S390 && PCI
 	depends on S390 && PCI
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 54feb1ec..55b411a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -3,3 +3,4 @@  obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
 arm_smmu_v3-objs-y += arm-smmu-v3.o
 arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
 arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
+obj-$(CONFIG_ARM_SMMU_V3_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b65f63e2..aac7fdb 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3602,6 +3602,8 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	arm_smmu_debugfs_init();
+
 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
 }
 
@@ -3610,6 +3612,7 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
 	arm_smmu_set_bus_ops(NULL);
+	arm_smmu_debugfs_uninit();
 	iommu_device_unregister(&smmu->iommu);
 	iommu_device_sysfs_remove(&smmu->iommu);
 	arm_smmu_device_disable(smmu);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 3e7af39..31c4580 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -752,4 +752,12 @@  static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
 
 static inline void arm_smmu_sva_notifier_synchronize(void) {}
 #endif /* CONFIG_ARM_SMMU_V3_SVA */
+
+#ifdef CONFIG_ARM_SMMU_V3_DEBUGFS
+void arm_smmu_debugfs_init(void);
+void arm_smmu_debugfs_uninit(void);
+#else
+static inline void arm_smmu_debugfs_init(void) {}
+static inline void arm_smmu_debugfs_uninit(void) {}
+#endif /* CONFIG_ARM_SMMU_V3_DEBUGFS */
 #endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/debugfs.c b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
new file mode 100644
index 0000000..1af219a
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/debugfs.c
@@ -0,0 +1,398 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/iommu.h>
+#include <linux/io-pgtable.h>
+#include <linux/pci.h>
+#include "arm-smmu-v3.h"
+#include "../../io-pgtable-arm.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"SMMUv3 debug: " fmt
+
+#define NAME_BUF_LEN 32
+
+static struct dentry *arm_smmu_debug;
+static char dump_pci_dev[NAME_BUF_LEN];
+static u32 pasid;
+static struct mutex lock;
+
+static ssize_t master_pdev_read(struct file *filp, char __user *buf,
+				size_t count, loff_t *pos)
+{
+	char pdev_name[NAME_BUF_LEN];
+	char name[NAME_BUF_LEN];
+	int ret;
+
+	mutex_lock(&lock);
+	strncpy(pdev_name, dump_pci_dev, NAME_BUF_LEN);
+	mutex_unlock(&lock);
+
+	if (!strlen(pdev_name)) {
+		pr_err("Please set pci_dev firstly\n");
+		return 0;
+	}
+
+	ret = scnprintf(name, NAME_BUF_LEN, "%s\n", pdev_name);
+	return simple_read_from_buffer(buf, count, pos, name, ret);
+}
+
+static ssize_t master_pdev_write(struct file *filp, const char __user *buf,
+				 size_t count, loff_t *pos)
+{
+	char name[NAME_BUF_LEN];
+	struct device *dev;
+	int len;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= NAME_BUF_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(name, NAME_BUF_LEN - 1, pos, buf, count);
+	if (len < 0)
+		return len;
+	name[len] = '\0';
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, name);
+	if (!dev) {
+		pr_err("Failed to find device\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&lock);
+	strncpy(dump_pci_dev, dev_name(dev), NAME_BUF_LEN);
+	mutex_unlock(&lock);
+
+	put_device(dev);
+
+	return count;
+}
+
+static const struct file_operations master_pdev_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = master_pdev_read,
+	.write = master_pdev_write,
+};
+
+static struct arm_smmu_master *arm_smmu_get_master(struct device *dev)
+{
+	struct arm_smmu_master *master;
+
+	if (!dev->iommu) {
+		pr_err("master device driver may not be loaded!\n");
+		return NULL;
+	}
+
+	master = dev_iommu_priv_get(dev);
+	if (!master) {
+		pr_err("Failed to find master dev\n");
+		return NULL;
+	}
+
+	return master;
+}
+
+static void ste_dump(struct seq_file *m, struct device *dev, __le64 *ste)
+{
+	int i;
+
+	seq_printf(m, "SMMUv3 STE values for device: %s\n", dev_name(dev));
+	for (i = 0; i < STRTAB_STE_DWORDS; i++) {
+		seq_printf(m, "0x%016llx\n", *ste);
+		ste++;
+	}
+}
+
+static int ste_show(struct seq_file *m, void *unused)
+{
+	struct arm_smmu_master *master;
+	struct arm_smmu_device *smmu;
+	struct device *dev;
+	__le64 *ste;
+
+	mutex_lock(&lock);
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
+	if (!dev) {
+		mutex_unlock(&lock);
+		pr_err("Failed to find device\n");
+		return -EINVAL;
+	}
+
+	master = arm_smmu_get_master(dev);
+	if (!master) {
+		put_device(dev);
+		mutex_unlock(&lock);
+		return -ENODEV;
+	}
+	smmu = master->smmu;
+
+	/* currently only support one master one sid */
+	ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
+	ste_dump(m, dev, ste);
+
+	put_device(dev);
+	mutex_unlock(&lock);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ste);
+
+static void cd_dump(struct seq_file *m, struct device *dev, __le64 *cd)
+{
+	int i;
+
+	seq_printf(m, "SMMUv3 CD values for device: %s, ssid: 0x%x\n",
+		   dev_name(dev), pasid);
+	for (i = 0; i < CTXDESC_CD_DWORDS; i++) {
+		seq_printf(m, "0x%016llx\n", *cd);
+		cd++;
+	}
+}
+
+static int cd_show(struct seq_file *m, void *unused)
+{
+	struct arm_smmu_master *master;
+	struct arm_smmu_domain *domain;
+	struct device *dev;
+	__le64 *cd;
+	int ret;
+
+	mutex_lock(&lock);
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
+	if (!dev) {
+		mutex_unlock(&lock);
+		pr_err("Failed to find device\n");
+		return -EINVAL;
+	}
+
+	master = arm_smmu_get_master(dev);
+	if (!master) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+	domain = master->domain;
+
+	cd = arm_smmu_get_cd_ptr(domain, pasid);
+	if (!cd) {
+		ret = -EINVAL;
+		pr_err("Failed to find cd(ssid: %u)\n", pasid);
+		goto err_out;
+	}
+	cd_dump(m, dev, cd);
+
+	put_device(dev);
+	mutex_unlock(&lock);
+
+	return 0;
+
+err_out:
+	put_device(dev);
+	mutex_unlock(&lock);
+	return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(cd);
+
+static void __ptdump(arm_lpae_iopte *ptep, int lvl, u64 va,
+		     struct arm_lpae_io_pgtable *data, struct seq_file *m)
+{
+	arm_lpae_iopte pte, *ptep_next;
+	u64 i, tmp_va = 0;
+	int entry_num;
+
+	entry_num = 1 << (data->bits_per_level + ARM_LPAE_PGD_IDX(lvl, data));
+
+	for (i = 0; i < entry_num; i++) {
+		pte = READ_ONCE(*(ptep + i));
+		if (!pte)
+			continue;
+
+		tmp_va = va | (i << ARM_LPAE_LVL_SHIFT(lvl, data));
+
+		if (iopte_leaf(pte, lvl, data->iop.fmt)) {
+			/* To do: print prot */
+			seq_printf(m, "va: %llx -> pa: %llx\n", tmp_va,
+				   iopte_to_paddr(pte, data));
+			continue;
+		}
+
+		ptep_next = iopte_deref(pte, data);
+		__ptdump(ptep_next, lvl + 1, tmp_va, data, m);
+	}
+}
+
+static void ptdump(struct seq_file *m, struct arm_smmu_domain *domain,
+		   void *pgd, int stage)
+{
+	struct arm_lpae_io_pgtable *data, data_sva;
+	int levels, va_bits, bits_per_level;
+	struct io_pgtable_ops *ops;
+	arm_lpae_iopte *ptep = pgd;
+
+	if (stage == 1 && !pasid) {
+		ops = domain->pgtbl_ops;
+		data = io_pgtable_ops_to_data(ops);
+	} else {
+		va_bits = VA_BITS - PAGE_SHIFT;
+		bits_per_level = PAGE_SHIFT - ilog2(sizeof(arm_lpae_iopte));
+		levels = DIV_ROUND_UP(va_bits, bits_per_level);
+
+		data_sva.start_level = ARM_LPAE_MAX_LEVELS - levels;
+		data_sva.pgd_bits = va_bits - (bits_per_level * (levels - 1));
+		data_sva.bits_per_level = bits_per_level;
+		data_sva.pgd = pgd;
+
+		data = &data_sva;
+	}
+
+	__ptdump(ptep, data->start_level, 0, data, m);
+}
+
+static int pt_dump_s1_show(struct seq_file *m, void *unused)
+{
+	struct arm_smmu_master *master;
+	struct arm_smmu_domain *domain;
+	struct device *dev;
+	__le64 *cd;
+	void *pgd;
+	u64 ttbr;
+	int ret;
+
+	mutex_lock(&lock);
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
+	if (!dev) {
+		mutex_unlock(&lock);
+		pr_err("Failed to find device\n");
+		return -EINVAL;
+	}
+
+	master = arm_smmu_get_master(dev);
+	if (!master) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+	domain = master->domain;
+
+	cd = arm_smmu_get_cd_ptr(domain, pasid);
+	if (!cd || !(le64_to_cpu(cd[0]) & CTXDESC_CD_0_V)) {
+		ret = -EINVAL;
+		pr_err("Failed to find valid cd(ssid: %u)\n", pasid);
+		goto err_out;
+	}
+
+	/* CD0 and other CDx are all using ttbr0 */
+	ttbr = le64_to_cpu(cd[1]) & CTXDESC_CD_1_TTB0_MASK;
+	pgd = phys_to_virt(ttbr);
+
+	if (ttbr) {
+		seq_printf(m, "SMMUv3 dump page table for device %s, stage 1, ssid 0x%x:\n",
+			   dev_name(dev), pasid);
+		ptdump(m, domain, pgd, 1);
+	}
+
+	put_device(dev);
+	mutex_unlock(&lock);
+
+	return 0;
+
+err_out:
+	put_device(dev);
+	mutex_unlock(&lock);
+	return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(pt_dump_s1);
+
+static int pt_dump_s2_show(struct seq_file *m, void *unused)
+{
+	struct arm_smmu_master *master;
+	struct arm_smmu_device *smmu;
+	struct device *dev;
+	__le64 *ste;
+	u64 vttbr;
+	void *pgd;
+	int ret;
+
+	mutex_lock(&lock);
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, dump_pci_dev);
+	if (!dev) {
+		mutex_unlock(&lock);
+		pr_err("Failed to find device\n");
+		return -EINVAL;
+	}
+
+	master = arm_smmu_get_master(dev);
+	if (!master) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+	smmu = master->smmu;
+
+	/* currently only support one master one sid */
+	ste = arm_smmu_get_step_for_sid(smmu, master->sids[0]);
+	if (!(le64_to_cpu(ste[0]) & (1UL << 2))) {
+		ret = -EINVAL;
+		pr_err("Stage 2 translation is not valid\n");
+		goto err_out;
+	}
+
+	vttbr = le64_to_cpu(ste[3]) & STRTAB_STE_3_S2TTB_MASK;
+	pgd = phys_to_virt(vttbr);
+
+	if (vttbr) {
+		seq_printf(m, "SMMUv3 dump page table for device %s, stage 2:\n",
+			   dev_name(dev));
+		ptdump(m, 0, pgd, 2);
+	}
+
+	put_device(dev);
+	mutex_unlock(&lock);
+
+	return 0;
+
+err_out:
+	put_device(dev);
+	mutex_unlock(&lock);
+	return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(pt_dump_s2);
+
+void arm_smmu_debugfs_init(void)
+{
+	mutex_init(&lock);
+
+	arm_smmu_debug = debugfs_create_dir("smmuv3", iommu_debugfs_dir);
+
+	debugfs_create_file("pci_dev", 0644, arm_smmu_debug, NULL,
+			    &master_pdev_fops);
+	/*
+	 * Problem here is we need to know dump which cd, currently my idea
+	 * is we can get related pasid by smmu_bond_get trace point or by
+	 * debug interface of master device specific driver, then here we
+	 * use pasid to dump related cd.
+	 *
+	 * Or there is no need to dump page table about s1(pasid != 0) and s2
+	 * as they can be got by /proc/<pid>/pagemap.
+	 */
+	debugfs_create_u32("pasid", 0644, arm_smmu_debug, &pasid);
+
+	debugfs_create_file("ste", 0444, arm_smmu_debug, NULL, &ste_fops);
+
+	debugfs_create_file("cd", 0444, arm_smmu_debug, NULL, &cd_fops);
+
+	debugfs_create_file("pt_dump_s1", 0444, arm_smmu_debug, NULL,
+			    &pt_dump_s1_fops);
+
+	debugfs_create_file("pt_dump_s2", 0444, arm_smmu_debug, NULL,
+			    &pt_dump_s2_fops);
+}
+
+void arm_smmu_debugfs_uninit(void)
+{
+	debugfs_remove_recursive(arm_smmu_debug);
+	mutex_destroy(&lock);
+}