diff mbox series

[v2] EDAC/{i10nm,skx,skx_common}: Support UV systems

Message ID 20241213012549.43099-1-kyle.meyer@hpe.com (mailing list archive)
State New
Headers show
Series [v2] EDAC/{i10nm,skx,skx_common}: Support UV systems | expand

Commit Message

Kyle Meyer Dec. 13, 2024, 1:25 a.m. UTC
The 3-bit source IDs in PCI configuration space registers, used to map
devices to sockets, are limited to 8 unique IDs, and each ID is local to
a UPI/QPI domain.

Source IDs cannot be used to map devices to sockets on UV systems
because they can exceed 8 sockets and have multiple UPI/QPI domains with
identical, repeating source IDs.

Use NUMA information to get package IDs instead of source IDs on UV
systems, and use package/source IDs to name IMC information structures.

Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
---

v1 -> v2:
 * Instead of checking for duplicate source IDs to determine if a system
   has multiple UPI/QPI domains, just check if it's a UV system.
 * https://lore.kernel.org/all/20241205165954.7957-1-kyle.meyer@hpe.com

 drivers/edac/i10nm_base.c | 11 +++------
 drivers/edac/skx_base.c   |  9 +++-----
 drivers/edac/skx_common.c | 47 +++++++++++++++++++++++++++------------
 drivers/edac/skx_common.h |  3 +--
 4 files changed, 40 insertions(+), 30 deletions(-)

Comments

Zhuo, Qiuxu Dec. 13, 2024, 5:17 a.m. UTC | #1
> From: Kyle Meyer <kyle.meyer@hpe.com>
> Sent: Friday, December 13, 2024 9:26 AM
> To: Luck, Tony <tony.luck@intel.com>; bp@alien8.de;
> james.morse@arm.com; mchehab@kernel.org; rric@kernel.org; Zhuo, Qiuxu
> <qiuxu.zhuo@intel.com>; linux-edac@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Cc: Meyer, Kyle <kyle.meyer@hpe.com>
> Subject: [PATCH v2] EDAC/{i10nm,skx,skx_common}: Support UV systems
> 
> The 3-bit source IDs in PCI configuration space registers, used to map devices
> to sockets, are limited to 8 unique IDs, and each ID is local to a UPI/QPI
> domain.
> 
> Source IDs cannot be used to map devices to sockets on UV systems because
> they can exceed 8 sockets and have multiple UPI/QPI domains with identical,
> repeating source IDs.
> 
> Use NUMA information to get package IDs instead of source IDs on UV
> systems, and use package/source IDs to name IMC information structures.
> 
> Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>

Thanks for this patch. LGTM. 
I tested it on a non-UV system and didn't observe any regressions.

Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

Thanks!
-Qiuxu
Luck, Tony Dec. 13, 2024, 7:20 p.m. UTC | #2
On Thu, Dec 12, 2024 at 07:25:49PM -0600, Kyle Meyer wrote:
> The 3-bit source IDs in PCI configuration space registers, used to map
> devices to sockets, are limited to 8 unique IDs, and each ID is local to
> a UPI/QPI domain.
> 
> Source IDs cannot be used to map devices to sockets on UV systems
> because they can exceed 8 sockets and have multiple UPI/QPI domains with
> identical, repeating source IDs.
> 
> Use NUMA information to get package IDs instead of source IDs on UV
> systems, and use package/source IDs to name IMC information structures.
> 
> Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>

Applied to RAS tree for v6.14 merge.

Thanks

-Tony
diff mbox series

Patch

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 51556c72a967..16d1110c0692 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -1010,7 +1010,7 @@  static struct notifier_block i10nm_mce_dec = {
 
 static int __init i10nm_init(void)
 {
-	u8 mc = 0, src_id = 0, node_id = 0;
+	u8 mc = 0, src_id = 0;
 	const struct x86_cpu_id *id;
 	struct res_config *cfg;
 	const char *owner;
@@ -1070,19 +1070,14 @@  static int __init i10nm_init(void)
 		if (rc < 0)
 			goto fail;
 
-		rc = skx_get_node_id(d, &node_id);
-		if (rc < 0)
-			goto fail;
-
-		edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+		edac_dbg(2, "src_id = %d\n", src_id);
 		for (i = 0; i < imc_num; i++) {
 			if (!d->imc[i].mdev)
 				continue;
 
 			d->imc[i].mc  = mc++;
 			d->imc[i].lmc = i;
-			d->imc[i].src_id  = src_id;
-			d->imc[i].node_id = node_id;
+			d->imc[i].src_id = src_id;
 			if (d->imc[i].hbm_mc) {
 				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
 				d->imc[i].num_channels = cfg->hbm_chan_num;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 14cfd394b469..93f7c05faccc 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -600,7 +600,7 @@  static int __init skx_init(void)
 	const struct munit *m;
 	const char *owner;
 	int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
-	u8 mc = 0, src_id, node_id;
+	u8 mc = 0, src_id;
 	struct skx_dev *d;
 
 	edac_dbg(2, "\n");
@@ -650,15 +650,12 @@  static int __init skx_init(void)
 		rc = skx_get_src_id(d, 0xf0, &src_id);
 		if (rc < 0)
 			goto fail;
-		rc = skx_get_node_id(d, &node_id);
-		if (rc < 0)
-			goto fail;
-		edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+
+		edac_dbg(2, "src_id = %d\n", src_id);
 		for (i = 0; i < SKX_NUM_IMC; i++) {
 			d->imc[i].mc = mc++;
 			d->imc[i].lmc = i;
 			d->imc[i].src_id = src_id;
-			d->imc[i].node_id = node_id;
 			rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
 					      "Skylake Socket", EDAC_MOD_STR,
 					      skx_get_dimm_config, cfg);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 6cf17af7d911..f7bd930e058f 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -19,6 +19,7 @@ 
 #include <linux/adxl.h>
 #include <acpi/nfit.h>
 #include <asm/mce.h>
+#include <asm/uv/uv.h>
 #include "edac_module.h"
 #include "skx_common.h"
 
@@ -221,33 +222,51 @@  void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
 }
 EXPORT_SYMBOL_GPL(skx_set_decode);
 
-int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
+static int skx_get_pkg_id(struct skx_dev *d, u8 *id)
 {
-	u32 reg;
+	int node;
+	int cpu;
 
-	if (pci_read_config_dword(d->util_all, off, &reg)) {
-		skx_printk(KERN_ERR, "Failed to read src id\n");
-		return -ENODEV;
+	node = pcibus_to_node(d->util_all->bus);
+	if (numa_valid_node(node)) {
+		for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) {
+			struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+			if (c->initialized && cpu_to_node(cpu) == node) {
+				*id = c->topo.pkg_id;
+				return 0;
+			}
+		}
 	}
 
-	*id = GET_BITFIELD(reg, 12, 14);
-	return 0;
+	skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n");
+	return -ENODEV;
 }
-EXPORT_SYMBOL_GPL(skx_get_src_id);
 
-int skx_get_node_id(struct skx_dev *d, u8 *id)
+int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
 {
 	u32 reg;
 
-	if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
-		skx_printk(KERN_ERR, "Failed to read node id\n");
+	/*
+	 * The 3-bit source IDs in PCI configuration space registers are limited
+	 * to 8 unique IDs, and each ID is local to a UPI/QPI domain.
+	 *
+	 * Source IDs cannot be used to map devices to sockets on UV systems
+	 * because they can exceed 8 sockets and have multiple UPI/QPI domains
+	 * with identical, repeating source IDs.
+	 */
+	if (is_uv_system())
+		return skx_get_pkg_id(d, id);
+
+	if (pci_read_config_dword(d->util_all, off, &reg)) {
+		skx_printk(KERN_ERR, "Failed to read src id\n");
 		return -ENODEV;
 	}
 
-	*id = GET_BITFIELD(reg, 0, 2);
+	*id = GET_BITFIELD(reg, 12, 14);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(skx_get_node_id);
+EXPORT_SYMBOL_GPL(skx_get_src_id);
 
 static int get_width(u32 mtr)
 {
@@ -507,7 +526,7 @@  int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 	pvt->imc = imc;
 
 	mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
-				  imc->node_id, imc->lmc);
+				  imc->src_id, imc->lmc);
 	if (!mci->ctl_name) {
 		rc = -ENOMEM;
 		goto fail0;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 54bba8a62f72..b0845bdd4516 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -103,7 +103,7 @@  struct skx_dev {
 		bool hbm_mc;
 		u8 mc;	/* system wide mc# */
 		u8 lmc;	/* socket relative mc# */
-		u8 src_id, node_id;
+		u8 src_id;
 		struct skx_channel {
 			struct pci_dev	*cdev;
 			struct pci_dev	*edev;
@@ -244,7 +244,6 @@  void skx_set_mem_cfg(bool mem_cfg_2lm);
 void skx_set_res_cfg(struct res_config *cfg);
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
-int skx_get_node_id(struct skx_dev *d, u8 *id);
 
 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);