[v5,5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

Message ID	20211025145018.29985-6-nchatrad@amd.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-edac-owner@kernel.org> Received-SPF: Pass (protection.outlook.com: domain of amd.com designates 165.204.84.17 as permitted sender) receiver=protection.outlook.com; client-ip=165.204.84.17; helo=SATLEXMB04.amd.com; From: Naveen Krishna Chatradhi <nchatrad@amd.com> To: <linux-edac@vger.kernel.org>, <x86@kernel.org> CC: <linux-kernel@vger.kernel.org>, <bp@alien8.de>, <mingo@redhat.com>, <mchehab@kernel.org>, <yazen.ghannam@amd.com>, Naveen Krishna Chatradhi <nchatrad@amd.com>, Muralidhara M K <muralimk@amd.com> Subject: [PATCH v5 5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes Date: Mon, 25 Oct 2021 20:20:18 +0530 Message-ID: <20211025145018.29985-6-nchatrad@amd.com> In-Reply-To: <20211025145018.29985-1-nchatrad@amd.com> References: <20211025145018.29985-1-nchatrad@amd.com> MIME-Version: 1.0 Content-Type: text/plain Precedence: bulk
Series	x86/edac/amd64: Add heterogeneous node support \| expand [v5,0/5] x86/edac/amd64: Add heterogeneous node support [v5,1/5] x86/amd_nb: Add support for northbridges on Aldebaran [v5,2/5] EDAC/mce_amd: Extract node id from MCA_IPID [v5,3/5] EDAC/amd64: Extend family ops functions [v5,4/5] EDAC/amd64: Move struct fam_type into amd64_pvt structure [v5,5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fbb1284f3c18..a0dc7ffe63d6 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1121,6 +1121,20 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) } } +static void debug_display_dimm_sizes_gpu(struct amd64_pvt *pvt, u8 ctrl) +{ + int size, cs = 0, cs_mode; + + edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); + + cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + + for_each_chip_select(cs, ctrl, pvt) { + size = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs); + amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size); + } +} + static void __dump_misc_regs_df(struct amd64_pvt *pvt) { struct amd64_umc *umc; @@ -1165,6 +1179,27 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) pvt->dhar, dhar_base(pvt)); } +static void __dump_misc_regs_gpu(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i, umc_base; + + for_each_umc(i) { + umc_base = get_umc_base(i); + umc = &pvt->umc[i]; + + edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); + edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); + edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); + edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i); + + debug_display_dimm_sizes_gpu(pvt, i); + } + + edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n", + pvt->dhar, dhar_base(pvt)); +} + /* Display and decode various NB registers for debug purposes. */ static void __dump_misc_regs(struct amd64_pvt *pvt) { @@ -1245,6 +1280,43 @@ static void f17_prep_chip_selects(struct amd64_pvt *pvt) } } +static void gpu_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 8; + pvt->csels[umc].m_cnt = 8; + } +} + +static void read_umc_base_mask_gpu(struct amd64_pvt *pvt) +{ + u32 base_reg, mask_reg; + u32 *base, *mask; + int umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + base_reg = get_umc_base_gpu(umc, cs) + UMCCH_BASE_ADDR; + base = &pvt->csels[umc].csbases[cs]; + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) { + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + } + + mask_reg = get_umc_base_gpu(umc, cs) + UMCCH_ADDR_MASK; + mask = &pvt->csels[umc].csmasks[cs]; + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) { + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + } + } +} + static void read_umc_base_mask(struct amd64_pvt *pvt) { u32 umc_base_reg, umc_base_reg_sec; @@ -1743,6 +1815,19 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) return channels; } +static int gpu_early_channel_count(struct amd64_pvt *pvt) +{ + int i, channels = 0; + + /* The memory channels in case of GPUs are fully populated */ + for_each_umc(i) + channels += pvt->csels[i].b_cnt; + + amd64_info("MCT channel count: %d\n", channels); + + return channels; +} + static int ddr3_cs_size(unsigned i, bool dct_width) { unsigned shift = 0; @@ -1870,11 +1955,46 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } +static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, + int csrow_nr, int dimm) +{ + u32 msb, weight, num_zero_bits; + u32 addr_mask_deinterleaved; + int size = 0; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; + + /* Return size in MBs. */ + return size >> 10; +} + static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 addr_mask_orig, addr_mask_deinterleaved; - u32 msb, weight, num_zero_bits; + u32 addr_mask_orig; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -1902,33 +2022,15 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, else addr_mask_orig = pvt->csels[umc].csmasks[dimm]; - /* - * The number of zero bits in the mask is equal to the number of bits - * in a full mask minus the number of bits in the current mask. - * - * The MSB is the number of bits in the full mask because BIT[0] is - * always 0. - * - * In the special 3 Rank interleaving case, a single bit is flipped - * without swapping with the most significant bit. This can be handled - * by keeping the MSB where it is and ignoring the single zero bit. - */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); - - /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); - - edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm); +} - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; +static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; - /* Return size in MBs. */ - return size >> 10; + return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1); } static void read_dram_ctl_register(struct amd64_pvt *pvt) @@ -2516,8 +2618,11 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) /* Prototypes for family specific ops routines */ static int init_csrows(struct mem_ctl_info *mci); static int init_csrows_df(struct mem_ctl_info *mci); +static int init_csrows_gpu(struct mem_ctl_info *mci); static void __read_mc_regs_df(struct amd64_pvt *pvt); +static void __read_mc_regs_gpu(struct amd64_pvt *pvt); static void update_umc_err_info(struct mce *m, struct err_info *err); +static void update_umc_err_info_gpu(struct mce *m, struct err_info *err); static const struct low_ops k8_ops = { .early_channel_count = k8_early_channel_count, @@ -2590,6 +2695,17 @@ static const struct low_ops f17_ops = { .get_umc_err_info = update_umc_err_info, }; +static const struct low_ops gpu_ops = { + .early_channel_count = gpu_early_channel_count, + .dbam_to_cs = gpu_addr_mask_to_cs_size, + .prep_chip_select = gpu_prep_chip_selects, + .get_base_mask = read_umc_base_mask_gpu, + .display_misc_regs = __dump_misc_regs_gpu, + .get_mc_regs = __read_mc_regs_gpu, + .populate_csrows = init_csrows_gpu, + .get_umc_err_info = update_umc_err_info_gpu, +}; + static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", @@ -2682,6 +2798,14 @@ static struct amd64_family_type family_types[] = { .max_mcs = 8, .ops = f17_ops, }, + [ALDEBARAN_GPUS] = { + .ctl_name = "ALDEBARAN", + .f0_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6, + .max_mcs = 4, + .ops = gpu_ops, + }, + }; /* @@ -2940,12 +3064,38 @@ static void update_umc_err_info(struct mce *m, struct err_info *err) err->csrow = m->synd & 0x7; } +/* + * The CPUs have one channel per UMC, So UMC number is equivalent to a + * channel number. The GPUs have 8 channels per UMC, so the UMC number no + * longer works as a channel number. + * The channel number within a GPU UMC is given in MCA_IPID[15:12]. + * However, the IDs are split such that two UMC values go to one UMC, and + * the channel numbers are split in two groups of four. + * + * Refer comment on get_umc_base_gpu() from amd64_edac.h + * + * For example, + * UMC0 CH[3:0] = 0x0005[3:0]000 + * UMC0 CH[7:4] = 0x0015[3:0]000 + * UMC1 CH[3:0] = 0x0025[3:0]000 + * UMC1 CH[7:4] = 0x0035[3:0]000 + */ +static void update_umc_err_info_gpu(struct mce *m, struct err_info *err) +{ + u8 ch = (m->ipid & GENMASK(31, 0)) >> 20; + u8 phy = ((m->ipid >> 12) & 0xf); + + err->channel = ch % 2 ? phy + 4 : phy; + err->csrow = phy; +} + static void decode_umc_error(int node_id, struct mce *m) { u8 ecc_type = (m->status >> 45) & 0x3; struct mem_ctl_info *mci; struct amd64_pvt *pvt; struct err_info err; + u8 df_inst_id; u64 sys_addr; mci = edac_mc_find(node_id); @@ -2975,7 +3125,17 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_umc_err_info(m, &err); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + /* + * GPU node has #phys[X] which has #channels[Y] each. + * On GPUs, df_inst_id = [X] * num_ch_per_phy + [Y]. + * On CPUs, "Channel"="UMC Number"="DF Instance ID". + */ + if (pvt->is_gpu) + df_inst_id = (err.csrow * pvt->channel_count / mci->nr_csrows) + err.channel; + else + df_inst_id = err.channel; + + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, df_inst_id, &sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; } @@ -3114,6 +3274,23 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) } } +static void __read_mc_regs_gpu(struct amd64_pvt *pvt) +{ + u8 nid = pvt->mc_node_id; + struct amd64_umc *umc; + u32 i, umc_base; + + /* Read registers from each UMC */ + for_each_umc(i) { + umc_base = get_umc_base_gpu(i, 0); + umc = &pvt->umc[i]; + + amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); + amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); + amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); + } +} + /* * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs) @@ -3196,7 +3373,9 @@ static void read_mc_regs(struct amd64_pvt *pvt) determine_memory_type(pvt); edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); - determine_ecc_sym_sz(pvt); + /* ECC symbol size is not available on GPU nodes */ + if (!pvt->is_gpu) + determine_ecc_sym_sz(pvt); } /* @@ -3243,7 +3422,10 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) csrow_nr >>= 1; cs_mode = DBAM_DIMM(csrow_nr, dbam); } else { - cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); + if (pvt->is_gpu) + cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + else + cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); } nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); @@ -3300,6 +3482,35 @@ static int init_csrows_df(struct mem_ctl_info *mci) return empty; } +static int init_csrows_gpu(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + int empty = 1; + u8 umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + empty = 0; + dimm = mci->csrows[umc]->channels[cs]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = MEM_HBM2; + dimm->edac_mode = EDAC_SECDED; + dimm->dtype = DEV_X16; + dimm->grain = 64; + } + } + + return empty; +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. @@ -3541,6 +3752,10 @@ static bool ecc_enabled(struct amd64_pvt *pvt) u8 ecc_en = 0, i; u32 value; + /* ECC is enabled by default on GPU nodes */ + if (pvt->is_gpu) + return true; + if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; struct amd64_umc *umc; @@ -3624,7 +3839,10 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->edac_ctl_cap = EDAC_FLAG_NONE; if (pvt->umc) { - f17h_determine_edac_ctl_cap(mci, pvt); + if (pvt->is_gpu) + mci->edac_ctl_cap |= EDAC_FLAG_SECDED; + else + f17h_determine_edac_ctl_cap(mci, pvt); } else { if (pvt->nbcap & NBCAP_SECDED) mci->edac_ctl_cap |= EDAC_FLAG_SECDED; @@ -3726,10 +3944,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) pvt->ops = &family_types[F17_M70H_CPUS].ops; pvt->fam_type->ctl_name = "F19h_M20h"; break; + } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { + if (pvt->mc_node_id >= amd_cpu_node_count()) { + pvt->fam_type = &family_types[ALDEBARAN_GPUS]; + pvt->ops = &family_types[ALDEBARAN_GPUS].ops; + pvt->is_gpu = true; + } else { + pvt->fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h_M30h"; + } + } else { + pvt->fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; } - pvt->fam_type = &family_types[F19_CPUS]; - pvt->ops = &family_types[F19_CPUS].ops; - family_types[F19_CPUS].ctl_name = "F19h"; break; default: @@ -3811,9 +4040,10 @@ static int init_one_instance(struct amd64_pvt *pvt) if (pvt->channel_count < 0) return ret; + /* Define layers for CPU and GPU nodes */ ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = pvt->csels[0].b_cnt; + layers[0].size = pvt->is_gpu ? pvt->fam_type->max_mcs : pvt->csels[0].b_cnt; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; @@ -3822,7 +4052,7 @@ static int init_one_instance(struct amd64_pvt *pvt) * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. */ - layers[1].size = pvt->fam_type->max_mcs; + layers[1].size = pvt->is_gpu ? pvt->csels[0].b_cnt : pvt->fam_type->max_mcs; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index d307df58ea1b..82723ae88d2e 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -126,6 +126,8 @@ #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0 0x14d0 +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6 0x14d6 /* * Function 1 - Address Map @@ -298,6 +300,7 @@ enum amd_families { F17_M60H_CPUS, F17_M70H_CPUS, F19_CPUS, + ALDEBARAN_GPUS, NUM_FAMILIES, }; @@ -391,6 +394,8 @@ struct amd64_pvt { struct amd64_umc *umc; /* UMC registers */ struct amd64_family_type *fam_type; + + bool is_gpu; }; enum err_codes { @@ -412,6 +417,28 @@ struct err_info { u32 offset; }; +static inline u32 get_umc_base_gpu(u8 umc, u8 channel) +{ + /* + * On CPUs, there is one channel per UMC, so UMC numbering equals + * channel numbering. On GPUs, there are eight channels per UMC, + * so the channel numbering is different from UMC numbering. + * + * On CPU nodes channels are selected in 6th nibble + * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000; + * + * On GPU nodes channels are selected in 3rd nibble + * HBM chX[3:0]= [Y ]5X[3:0]000; + * HBM chX[7:4]= [Y+1]5X[3:0]000 + */ + umc *= 2; + + if (channel >= 4) + umc++; + + return 0x50000 + (umc << 20) + ((channel % 4) << 12); +} + static inline u32 get_umc_base(u8 channel) { /* chY: 0xY50000 */

[v5,5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

Commit Message

Comments

Patch