diff mbox series

[2/3] ampere/arm64: Work around Ampere Altra erratum #82288 PCIE_65

Message ID 20240827130829.43632-3-alex.bennee@linaro.org (mailing list archive)
State New, archived
Headers show
Series altra workarounds for PCIE_64 with instrumentation | expand

Commit Message

Alex Bennée Aug. 27, 2024, 1:08 p.m. UTC
From: D Scott Phillips <scott@os.amperecomputing.com>

Altra's PCIe controller may generate incorrect addresses when receiving
writes from the CPU with a discontiguous set of byte enables. Attempt to
work around this by handing out Device-nGnRE maps instead of Normal
Non-cacheable maps for PCIe memory areas.

Upstream-Status: Pending
Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 arch/arm64/Kconfig               | 22 +++++++++++++++++++++-
 arch/arm64/include/asm/io.h      |  3 +++
 arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++++++++++-----
 arch/arm64/mm/ioremap.c          | 27 +++++++++++++++++++++++++++
 drivers/pci/quirks.c             |  9 +++++++++
 include/asm-generic/io.h         |  4 ++++
 mm/ioremap.c                     |  2 +-
 7 files changed, 87 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b3fc891f15442..01adb50df214e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -440,6 +440,27 @@  config AMPERE_ERRATUM_AC03_CPU_38
 config ARM64_WORKAROUND_CLEAN_CACHE
 	bool
 
+config ALTRA_ERRATUM_82288
+	bool "Ampere Altra: 82288: PCIE_65: PCIe Root Port outbound write combining issue"
+	default y
+	help
+	  This option adds an alternative code sequence to work around
+	  Ampere Altra erratum 82288.
+
+	  PCIe device drivers may map MMIO space as Normal, non-cacheable
+	  memory attribute (e.g. Linux kernel drivers mapping MMIO
+	  using ioremap_wc). This may be for the purpose of enabling write
+	  combining or unaligned accesses. This can result in data corruption
+	  on the PCIe interface’s outbound MMIO writes due to issues with the
+	  write-combining operation.
+
+	  The workaround modifies software that maps PCIe MMIO space as Normal,
+	  non-cacheable memory (e.g. ioremap_wc) to instead Device,
+	  non-gatheringmemory (e.g. ioremap). And all memory operations on PCIe
+	  MMIO space must be strictly aligned.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_826319
 	bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
 	default y
@@ -2388,4 +2409,3 @@  endmenu # "CPU Power Management"
 source "drivers/acpi/Kconfig"
 
 source "arch/arm64/kvm/Kconfig"
-
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 41fd90895dfc3..403b65f2f44de 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -273,6 +273,9 @@  __iowrite64_copy(void __iomem *to, const void *from, size_t count)
 
 #define ioremap_prot ioremap_prot
 
+pgprot_t ioremap_map_prot(phys_addr_t phys_addr, size_t size, unsigned long prot);
+#define ioremap_map_prot ioremap_map_prot
+
 #define _PAGE_IOREMAP PROT_DEVICE_nGnRE
 
 #define ioremap_wc(addr, size)	\
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7a4f5604be3f7..f4603924390eb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -236,11 +236,6 @@  static inline pte_t pte_mkyoung(pte_t pte)
 	return set_pte_bit(pte, __pgprot(PTE_AF));
 }
 
-static inline pte_t pte_mkspecial(pte_t pte)
-{
-	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
-}
-
 static inline pte_t pte_mkcont(pte_t pte)
 {
 	pte = set_pte_bit(pte, __pgprot(PTE_CONT));
@@ -682,6 +677,28 @@  static inline bool pud_table(pud_t pud) { return true; }
 				 PUD_TYPE_TABLE)
 #endif
 
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+extern bool __read_mostly have_altra_erratum_82288;
+#endif
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+	phys_addr_t phys = __pte_to_phys(pte);
+	pgprot_t prot = __pgprot(pte_val(pte) & ~PTE_ADDR_LOW);
+
+	if (unlikely(have_altra_erratum_82288) &&
+	    (phys < 0x80000000 ||
+	     (phys >= 0x200000000000 && phys < 0x400000000000) ||
+	     (phys >= 0x600000000000 && phys < 0x800000000000))) {
+		pte = __pte(__phys_to_pte_val(phys) | pgprot_val(pgprot_device(prot)));
+	}
+#endif
+
+	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
+}
+
+
 extern pgd_t init_pg_dir[];
 extern pgd_t init_pg_end[];
 extern pgd_t swapper_pg_dir[];
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 269f2f63ab7dc..8965766181359 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -3,6 +3,33 @@ 
 #include <linux/mm.h>
 #include <linux/io.h>
 
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+
+bool have_altra_erratum_82288 __read_mostly;
+EXPORT_SYMBOL(have_altra_erratum_82288);
+
+static bool is_altra_pci(phys_addr_t phys_addr, size_t size)
+{
+	phys_addr_t end = phys_addr + size;
+
+	return (phys_addr < 0x80000000 ||
+		(end > 0x200000000000 && phys_addr < 0x400000000000) ||
+		(end > 0x600000000000 && phys_addr < 0x800000000000));
+}
+#endif
+
+pgprot_t ioremap_map_prot(phys_addr_t phys_addr, size_t size,
+                          unsigned long prot_val)
+{
+	pgprot_t prot = __pgprot(prot_val);
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+	if (unlikely(have_altra_erratum_82288 && is_altra_pci(phys_addr, size))) {
+		prot = pgprot_device(prot);
+	}
+#endif
+	return prot;
+}
+
 void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
 			   unsigned long prot)
 {
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a2ce4e08edf5a..8baf90ee3357c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -6234,6 +6234,15 @@  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size);
 #endif
 
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+static void quirk_altra_erratum_82288(struct pci_dev *dev)
+{
+	pr_info_once("Write combining PCI maps disabled due to hardware erratum\n");
+	have_altra_erratum_82288 = true;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMPERE, 0xe100, quirk_altra_erratum_82288);
+#endif
+
 /*
  * For a PCI device with multiple downstream devices, its driver may use
  * a flattened device tree to describe the downstream devices.
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 80de699bf6af4..75670d7094537 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -1047,6 +1047,10 @@  static inline void iounmap(volatile void __iomem *addr)
 #elif defined(CONFIG_GENERIC_IOREMAP)
 #include <linux/pgtable.h>
 
+#ifndef ioremap_map_prot
+#define ioremap_map_prot(phys_addr, size, prot) __pgprot(prot)
+#endif
+
 void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size,
 				   pgprot_t prot);
 
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 3e049dfb28bd0..a4e6950682f33 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -52,7 +52,7 @@  void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size,
 void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
 			   unsigned long prot)
 {
-	return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+	return generic_ioremap_prot(phys_addr, size, ioremap_map_prot(phys_addr, size, prot));
 }
 EXPORT_SYMBOL(ioremap_prot);
 #endif